From 00a57277fd296ad421a9104806ce18549b48104c Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Wed, 8 Jun 2022 14:26:36 -0800 Subject: [PATCH 01/19] Refactoring SGA To reduce the tight coupling of 'Versioned' classes and their Unversioned base; the unversioned base no longer contains any version lookups. Instead; the APIvX protocol should be used to get/specify types that each version is expected to have This refactoring breaks alot of the old codebase; which expected most versioned types to 'figure it out' by being passed the version and the stream The new APIvX format should ensure that A) modules that want to be vX compatible define their 'version' of the vX classes (vX defines a helper to determine if a module has been properly defined as an API) A.1) While modules can be apis, the preferred method is to use APIvX objects; while following the vX module syntax format for imports B) 'Simple' to define their own arbitrary vX formats: Simply create a class with classvars and set the classes to their appropriate hook. C) Registering versions is done in a single location; before each individual class needed to update a lookup table with the versioned classes & there was only one global table (per class), meaning swapping the version contexts was impossible. Now, we still have one global lookup table; but it's easier to pass a version lookup table and avoid worrying about ensuring other tables are up to date This is kinda a rant, but this is a massive API change With tests not fully implemented for SGA, alot of functionality is broken; and the tests created need to be refactored to account for the changes --- src/relic/chunky/chunk/header.py | 2 +- src/relic/chunky/chunky/header.py | 2 +- src/relic/sga/__init__.py | 38 +- src/relic/sga/abc_.py | 564 ++++++++++++++++++ src/relic/sga/archive/__init__.py | 16 - src/relic/sga/archive/archive.py | 83 --- src/relic/sga/archive/header.py | 246 -------- src/relic/sga/checksums.py | 25 + src/relic/sga/common.py | 15 +- src/relic/sga/file/__init__.py | 11 - src/relic/sga/file/file.py | 85 --- src/relic/sga/file/header.py | 151 ----- src/relic/sga/folder/__init__.py | 10 - src/relic/sga/folder/folder.py | 68 --- src/relic/sga/folder/header.py | 61 -- src/relic/sga/hierarchy.py | 77 --- src/relic/sga/io.py | 78 +++ src/relic/sga/protocols.py | 107 ++++ src/relic/sga/toc/__init__.py | 13 - src/relic/sga/toc/toc.py | 50 -- src/relic/sga/toc/toc_headers.py | 62 -- src/relic/sga/toc/toc_ptr.py | 108 ---- src/relic/sga/v2.py | 151 +++++ src/relic/sga/v5.py | 157 +++++ src/relic/sga/v9.py | 154 +++++ src/relic/sga/vX.py | 39 ++ src/relic/sga/vdrive/__init__.py | 10 - src/relic/sga/vdrive/header.py | 65 -- src/relic/sga/vdrive/virtual_drive.py | 67 --- src/scripts/dump_sga.py | 9 +- src/scripts/universal/sga/unpack.py | 5 +- tests/relic/sga/archive/test_archive.py | 59 +- .../relic/sga/archive/test_archive_header.py | 137 +++-- tests/relic/sga/datagen.py | 104 ++-- tests/relic/sga/file/test_file_header.py | 41 +- tests/relic/sga/test_vX_interface.py | 40 ++ 36 files changed, 1530 insertions(+), 1380 deletions(-) create mode 100644 src/relic/sga/abc_.py delete mode 100644 src/relic/sga/archive/__init__.py delete mode 100644 src/relic/sga/archive/archive.py delete mode 100644 src/relic/sga/archive/header.py create mode 100644 src/relic/sga/checksums.py delete mode 100644 src/relic/sga/file/__init__.py delete mode 100644 src/relic/sga/file/file.py delete mode 100644 src/relic/sga/file/header.py delete mode 100644 src/relic/sga/folder/__init__.py delete mode 100644 src/relic/sga/folder/folder.py delete mode 100644 src/relic/sga/folder/header.py delete mode 100644 src/relic/sga/hierarchy.py create mode 100644 src/relic/sga/io.py create mode 100644 src/relic/sga/protocols.py delete mode 100644 src/relic/sga/toc/__init__.py delete mode 100644 src/relic/sga/toc/toc.py delete mode 100644 src/relic/sga/toc/toc_headers.py delete mode 100644 src/relic/sga/toc/toc_ptr.py create mode 100644 src/relic/sga/v2.py create mode 100644 src/relic/sga/v5.py create mode 100644 src/relic/sga/v9.py create mode 100644 src/relic/sga/vX.py delete mode 100644 src/relic/sga/vdrive/__init__.py delete mode 100644 src/relic/sga/vdrive/header.py delete mode 100644 src/relic/sga/vdrive/virtual_drive.py create mode 100644 tests/relic/sga/test_vX_interface.py diff --git a/src/relic/chunky/chunk/header.py b/src/relic/chunky/chunk/header.py index 68c84ff..fe78790 100644 --- a/src/relic/chunky/chunk/header.py +++ b/src/relic/chunky/chunk/header.py @@ -8,7 +8,7 @@ from serialization_tools.vstruct import VStruct from ..chunky.header import ChunkyVersion -from ...common import VersionLike, VersionError +from relic.sga.common.common import VersionLike, VersionError class ChunkType(Enum): diff --git a/src/relic/chunky/chunky/header.py b/src/relic/chunky/chunky/header.py index 05b75b4..fba28e0 100644 --- a/src/relic/chunky/chunky/header.py +++ b/src/relic/chunky/chunky/header.py @@ -6,7 +6,7 @@ from serialization_tools.magic import MagicWordIO, MagicWord from serialization_tools.structx import Struct -from relic.common import VersionEnum, Version, VersionLike, VersionError +from relic.sga.common.common import VersionEnum, Version, VersionLike, VersionError ChunkyVersionLayout = Struct("< 2L") diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py index 70428ba..2d5926d 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga/__init__.py @@ -1,19 +1,21 @@ -from .archive import * -from .file import * -from .folder import * -from .toc import * -from .vdrive import * -from . import common, hierarchy, writer -from . import archive, file, folder, toc, vdrive +# +# from . import common, hierarchy, writer +# # from .common import vdrive, archive, folder, file, toc +# +# # __all__ = [ +# # "common", +# # "hierarchy", +# # "writer", +# # ] +# +# # __all__.extend(archive.__all__) +# # __all__.extend(file.__all__) +# # __all__.extend(folder.__all__) +# # __all__.extend(toc.__all__) +# # __all__.extend(vdrive.__all__) +from relic.sga.v2 import APIv2 +from relic.sga.v5 import APIv5 +from relic.sga.v9 import APIv9 -__all__ = [ - "common", - "hierarchy", - "writer", -] - -__all__.extend(archive.__all__) -__all__.extend(file.__all__) -__all__.extend(folder.__all__) -__all__.extend(toc.__all__) -__all__.extend(vdrive.__all__) +__APIS = [APIv2,APIv5,APIv9] +APIS = {api.version:api for api in __APIS} \ No newline at end of file diff --git a/src/relic/sga/abc_.py b/src/relic/sga/abc_.py new file mode 100644 index 0000000..f1ce99d --- /dev/null +++ b/src/relic/sga/abc_.py @@ -0,0 +1,564 @@ +from __future__ import annotations + +import zlib +from abc import ABC +from dataclasses import dataclass +from pathlib import PurePosixPath +from typing import List, BinaryIO, Optional, Dict, ClassVar, Tuple, Type + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.size import KiB +from serialization_tools.structx import Struct + +# import relic.sga.io +from relic.common import VersionLike +from relic.sga.common import ArchiveRange, ArchiveVersion +# from relic.sga.io import walk +from relic.sga.protocols import ArchiveHeader, Archive, FileCollection, FolderCollection, Folder, File, VirtualDrive, ArchiveWalk + +_NULL = b"\0" +_BUFFER_SIZE = 64 * KiB + + +def walk(self): + raise NotImplementedError # Currently causes cyclic dependencies; needs a fix + + +@dataclass +class ArchiveTableOfContentsABC: + drives: List[VirtualDriveABC] + folders: List[FolderABC] + files: List[FileABC] + names: Dict[int, str] + + @classmethod + def create(cls, toc_headers: ArchiveTableOfContentsHeadersABC) -> ArchiveTableOfContentsABC: + + drives = [VirtualDriveABC.create(header) for header in toc_headers.drives] + folders = [FolderABC.create(header) for header in toc_headers.folders] + files = [FileABC.create(header) for header in toc_headers.files] + + return ArchiveTableOfContentsABC(drives, folders, files, toc_headers.names) + + def load_data(self, stream: BinaryIO): + for _ in self.files: + _.load_data(stream) + + def load_toc(self): + for _ in self.drives: + _.load_toc(self) + for _ in self.folders: + _.load_toc(self) + for _ in self.files: + _.load_toc(self) + + def build_tree(self): + for _ in self.drives: + _.build_tree() + + +@dataclass +class ArchiveTableOfContentsHeadersABC: + drives: List[VirtualDriveHeaderABC] + folders: List[FolderHeaderABC] + files: List[FileHeaderABC] + names: Dict[int, str] + VDRIVE_HEADER_CLS: ClassVar[Type[VirtualDriveHeaderABC]] + FOLDER_HEADER_CLS: ClassVar[Type[FolderHeaderABC]] + FILE_HEADER_CLS: ClassVar[Type[FileHeaderABC]] + + @classmethod + def old_unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC, version: VersionLike = None) -> ArchiveTableOfContentsHeadersABC: + version = version or ptr.version # abusing the fact that the classes know their own version to avoid explicitly passing it in + + local_ptr = ptr.virtual_drive_ptr + with local_ptr.stream_jump_to(stream) as handle: + virtual_drives = [VirtualDriveHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] + + local_ptr = ptr.folder_ptr + with local_ptr.stream_jump_to(stream) as handle: + folders = [FolderHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] + + local_ptr = ptr.file_ptr + with local_ptr.stream_jump_to(stream) as handle: + files = [FileHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] + + # This gets a bit wierd + local_ptr = ptr.name_ptr + names: Dict[int, str] = {} + with local_ptr.stream_jump_to(stream) as handle: + start = stream.tell() # use stream to avoid invalidating window + while len(names) < local_ptr.count: + remaining = local_ptr.count - len(names) + current = stream.tell() # Get relative pos to start + buffer = handle.read(_BUFFER_SIZE) + terminal_null = buffer.endswith(_NULL) + parts = buffer.split(_NULL, remaining) + + offset = 0 + for i, p in enumerate(parts): + if i == len(parts) - 1: + break + names[current - start + offset] = p.decode("ascii") + offset += len(p) + 1 # +1 to include null terminal + + if not terminal_null: + stream.seek(current + offset) + + return ArchiveTableOfContentsHeadersABC(virtual_drives, folders, files, names) + + @classmethod + def unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC) -> ArchiveTableOfContentsHeadersABC: + local_ptr = ptr.virtual_drive_ptr + with local_ptr.stream_jump_to(stream) as handle: + virtual_drives = [cls.VDRIVE_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] + + local_ptr = ptr.folder_ptr + with local_ptr.stream_jump_to(stream) as handle: + folders = [cls.FOLDER_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] + + local_ptr = ptr.file_ptr + with local_ptr.stream_jump_to(stream) as handle: + files = [cls.FILE_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] + + # This gets a bit wierd + local_ptr = ptr.name_ptr + names: Dict[int, str] = {} + with local_ptr.stream_jump_to(stream) as handle: + start = stream.tell() # use stream to avoid invalidating window + while len(names) < local_ptr.count: + remaining = local_ptr.count - len(names) + current = stream.tell() # Get relative pos to start + buffer = handle.read(_BUFFER_SIZE) + terminal_null = buffer.endswith(_NULL) + parts = buffer.split(_NULL, remaining) + + offset = 0 + for i, p in enumerate(parts): + if i == len(parts) - 1: + break + names[current - start + offset] = p.decode("ascii") + offset += len(p) + 1 # +1 to include null terminal + + if not terminal_null: + stream.seek(current + offset) + + return ArchiveTableOfContentsHeadersABC(virtual_drives, folders, files, names) + + +@dataclass +class TocItemPtrABC(Ptr): + def __init__(self, offset: int, count: int, whence: int = 0): + super().__init__(offset, whence) + self.count = count + + +@dataclass +class ArchiveTableOfContentsPtrABC: + # Virtual Drives (offset, count), Folder (offset, count), File (offset, count), Names (offset, count) + LAYOUT: ClassVar[Struct] + virtual_drive_ptr: TocItemPtrABC + folder_ptr: TocItemPtrABC + file_ptr: TocItemPtrABC + name_ptr: TocItemPtrABC + + @property + def version(self) -> ArchiveVersion: + raise NotImplementedError + + @classmethod + def _unpack_tuple(cls, stream: BinaryIO) -> Tuple[TocItemPtrABC, TocItemPtrABC, TocItemPtrABC, TocItemPtrABC]: + vd_offset, vd_count, fold_offset, fold_count, file_offset, file_count, name_offset, name_count = cls.LAYOUT.unpack_stream(stream) + vd_ptr = TocItemPtrABC(vd_offset, vd_count) + fold_ptr = TocItemPtrABC(fold_offset, fold_count) + file_ptr = TocItemPtrABC(file_offset, file_count) + name_ptr = TocItemPtrABC(name_offset, name_count) + return vd_ptr, fold_ptr, file_ptr, name_ptr + + def _pack_tuple(self) -> Tuple[int, int, int, int, int, int, int, int]: + return self.virtual_drive_ptr.offset, self.virtual_drive_ptr.count, \ + self.folder_ptr.offset, self.folder_ptr.count, \ + self.file_ptr.offset, self.file_ptr.count, \ + self.name_ptr.offset, self.name_ptr.count + + @classmethod + def unpack_version(cls, stream: BinaryIO, version: VersionLike) -> 'ArchiveTableOfContentsPtrABC': + raise TypeError("Use APIs[version].ArchiveTableOfContentsPtr.unpack(stream)") + # toc_ptr_class = _ToCPtr_VERSION_MAP.get(version) + # + # if not toc_ptr_class: + # raise NotImplementedError(version) + # + # return relic.sga.io.unpack_archive(stream) + + @classmethod + def unpack(cls, stream: BinaryIO) -> 'ArchiveTableOfContentsPtrABC': + args = cls._unpack_tuple(stream) + return cls(*args) + + def pack(self, stream: BinaryIO) -> int: + args = self._pack_tuple() + return self.LAYOUT.pack_stream(stream, *args) + + def __str__(self): + parts = [f"{k}={v}" for k, v in self.__dict__.items()] + return f"{self.__class__.__name__}({', '.join(parts)})" + + def __repr__(self): + return str(self) + + +@dataclass +class ArchiveHeaderABC(ArchiveHeader, ABC): + name: str + toc_ptr: Ptr + data_ptr: WindowPtr + + +@dataclass +class ArchiveABC(Archive): + header: ArchiveHeader + """Sparse represents whether data was loaded on creation.""" + _sparse: bool + + def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse: bool): + self.header = header + self._sparse = _sparse + self.drives = drives + + def walk(self) -> ArchiveWalk: + return walk(self) + + TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveTableOfContentsPtrABC + TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeadersABC + TOC_CLS: ClassVar[Type[ArchiveTableOfContentsABC]] = ArchiveTableOfContentsABC + + @classmethod + def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): + # version = header.version + with header.toc_ptr.stream_jump_to(stream) as handle: + toc_ptr = cls.TOC_PTR_CLS.unpack(handle) + toc_headers = cls.TOC_HEADERS_CLS.unpack(handle, toc_ptr) + toc = cls.TOC_CLS.create(toc_headers) + + toc.load_toc() + toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up + if not sparse: + with header.data_ptr.stream_jump_to(stream) as handle: + toc.load_data(handle) + + return cls(header, toc.drives, sparse) + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +@dataclass +class FileABC(File): + header: FileHeaderABC + name: str + data: Optional[bytes] = None + _decompressed: bool = False + parent_folder: Optional[FolderABC] = None + parent_drive: Optional[VirtualDriveABC] = None + + @property + def data_loaded(self) -> bool: + return self.data is not None + + @property + def expects_decompress(self) -> bool: + return self.header.compressed + + @property + def decompressed(self) -> bool: + if self.data_loaded: + return self._decompressed or not self.expects_decompress + else: + return False + + @property + def full_path(self) -> PurePosixPath: + if self.parent_folder: + return self.parent_folder.full_path / self.name + elif self.parent_drive: + return self.parent_drive.full_path / self.name + else: + return PurePosixPath(self.name) + + @classmethod + def create(cls, header: FileHeaderABC) -> FileABC: + _decompressed = False + # noinspection PyTypeChecker + return FileABC(header, None, None, _decompressed) + + def load_name_from_lookup(self, name_lookup: Dict[int, str]): + self.name = name_lookup[self.header.name_sub_ptr.offset] + + def load_toc(self, toc: ArchiveTableOfContentsABC): + self.load_name_from_lookup(toc.names) + + def read_data(self, stream: BinaryIO, decompress: bool = False) -> bytes: + with self.header.data_sub_ptr.stream_jump_to(stream) as handle: + buffer = handle.read(self.header.compressed_size) + if decompress and self.expects_decompress: + return zlib.decompress(buffer) + else: + return buffer + + def load_data(self, stream: BinaryIO, decompress: bool = False): + self.data = self.read_data(stream, decompress) + self._decompressed = decompress + + def get_decompressed_data(self) -> bytes: + if self.decompressed: + return self.data + else: + # zlib_header = Struct("2B").unpack(self.data[:2]) + # full_zlib_header = (zlib_header[0] & 0xF0) >> 4, zlib_header[0] & 0xF, \ + # (zlib_header[1] & 0b11000000) >> 6, (zlib_header[1] >> 5) & 0b1, zlib_header[1] & 0b11111 + # convert = {7: 32, 6: 16} + # assert convert[full_zlib_header[0]] == self.header.compression_flag.value + return zlib.decompress(self.data) + + def decompress(self): + self.data = self.get_decompressed_data() + self._decompressed = True + + +@dataclass +class FileHeaderABC: + LAYOUT: ClassVar[Struct] + name_sub_ptr: Ptr # Sub ptr is expected to be used via window (E.G. 'WindowPtr() as handle', then, 'data_sub_ptr.stream_jump_to(handle)') + data_sub_ptr: Ptr + decompressed_size: int + compressed_size: int + + def __eq__(self, other): + # TODO ptr equality + return self.decompressed_size == other.decompressed_size and self.compressed_size == other.compressed_size + + @property + def compressed(self): + raise NotImplementedError + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeaderABC: + raise NotImplementedError + + def pack(self, stream: BinaryIO) -> int: + raise NotImplementedError + + @classmethod + def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> FileHeaderABC: + raise TypeError("Use APIs[version].FileHeader.unpack(stream)") + # _VERSION_MAP = None # TODO move to IO + # header_class = _FILE_HEADER_VERSION_MAP.get(version) + # + # if not header_class: + # raise NotImplementedError(version) + # + # return header_class.old_unpack(stream) + + +@dataclass +class FolderCollectionABC(FolderCollection): + sub_folders: List[Folder] + + +@dataclass +class FileCollectionABC(FileCollection): + files: List[File] + + +@dataclass +class FolderChild: + parent_folder: Optional[Folder] + + +@dataclass +class DriveChild: + parent_drive: Optional[VirtualDrive] + + +@dataclass +class FolderABC(Folder, FolderCollectionABC, FileCollectionABC, FolderChild, DriveChild): + header: FolderHeaderABC + name: str + + def __init__(self, header: FolderHeaderABC, name: str, sub_folders: List[FolderABC], files: List[FileABC], parent_folder: Optional[FolderABC] = None, drive: Optional[VirtualDriveABC] = None): + self.header = header + self.name = name + self.sub_folders = sub_folders + self.files = files + self.parent_drive = drive + self.parent_folder = parent_folder + + @property + def full_path(self) -> PurePosixPath: + if self.parent_drive: + return self.parent_drive.full_path / self.name + else: + return PurePosixPath(self.name) + + def walk(self) -> ArchiveWalk: + return walk(self) + + @classmethod + def create(cls, header: FolderHeaderABC) -> FolderABC: + name = None + folders = [None] * header.sub_folder_range.size + files = [None] * header.file_range.size + # noinspection PyTypeChecker + return FolderABC(header, name, folders, files) + + def load_toc(self, toc: ArchiveTableOfContentsABC): + self.load_folders(toc.folders) + self.load_files(toc.files) + self.load_name_from_lookup(toc.names) + + def load_name_from_lookup(self, name_lookup: Dict[int, str]): + self.name = name_lookup[self.header.name_offset] + + def load_folders(self, folders: List[FolderABC]): + if self.header.sub_folder_range.start < len(folders): + for folder_index in self.header.sub_folder_range: + sub_folder_index = folder_index - self.header.sub_folder_range.start + f = self.sub_folders[sub_folder_index] = folders[folder_index] + f.parent_folder = self + + def load_files(self, files: List[FileABC]): + if self.header.file_range.start < len(files): + for file_index in self.header.file_range: + sub_file_index = file_index - self.header.file_range.start + f = self.files[sub_file_index] = files[file_index] + f.parent_folder = self + + +@dataclass +class FolderHeaderABC: + LAYOUT: ClassVar[Struct] + + name_offset: int + sub_folder_range: ArchiveRange + file_range: ArchiveRange + + @classmethod + def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> 'FolderHeaderABC': + raise TypeError("Use APIs[version].FolderHeader.unpack(stream)") + # header_class = _FOLDER_HEADER_VERSION_MAP.get(version) + # + # if not header_class: + # raise NotImplementedError(version) + # + # return header_class.unpack(stream) + + def pack(self, stream: BinaryIO) -> int: + args = self.name_offset, self.sub_folder_range.start, self.sub_folder_range.end, \ + self.file_range.start, self.file_range.end + return self.LAYOUT.pack_stream(stream, *args) + + @classmethod + def unpack(cls, stream: BinaryIO) -> 'FolderHeaderABC': + name_offset, sub_folder_start, sub_folder_end, file_start, file_end = cls.LAYOUT.unpack_stream(stream) + sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) + file_range = ArchiveRange(file_start, file_end) + return cls(name_offset, sub_folder_range, file_range) + + +@dataclass +class VirtualDriveHeaderABC: + LAYOUT: ClassVar[Struct] + + path: str + name: str + + sub_folder_range: ArchiveRange + file_range: ArchiveRange + unk: bytes + + @classmethod + def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> 'VirtualDriveHeaderABC': + raise TypeError("Use APIs[version].VirtualDriveHeader.unpack(stream)") + # header_class = _VIRTUAL_DRIVE_HEADER_VERSION_MAP.get(version) + # + # if not header_class: + # raise NotImplementedError(version) + # + # return header_class.unpack(stream) + + def pack(self, stream: BinaryIO) -> int: + args = self.path.encode("ascii"), self.name.encode("ascii"), self.sub_folder_range.start, self.sub_folder_range.end, \ + self.file_range.start, self.file_range.end, 0 + return self.LAYOUT.pack_stream(stream, *args) + + @classmethod + def unpack(cls, stream: BinaryIO) -> 'VirtualDriveHeaderABC': + path, name, sub_folder_start, sub_folder_end, file_start, file_end, unk = cls.LAYOUT.unpack_stream(stream) + path, name = path.decode("ascii").rstrip("\00"), name.decode("ascii").rstrip("\00") + sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) + file_range = ArchiveRange(file_start, file_end) + return cls(path, name, sub_folder_range, file_range, unk) + + +@dataclass +class VirtualDriveABC(FolderCollectionABC, FileCollectionABC): + header: VirtualDriveHeaderABC + + def __init__(self, header: VirtualDriveHeaderABC, sub_folders: List[FolderABC], files: List[FileABC]): + self.header = header + self.sub_folders = sub_folders + self.files = files + + @property + def path(self) -> str: + return self.header.path + + @property + def name(self) -> str: + return self.header.name + + def walk(self) -> ArchiveWalk: + return walk(self) + + @property + def full_path(self) -> PurePosixPath: + return PurePosixPath(self.path + ":") + + @classmethod + def create(cls, header: VirtualDriveHeaderABC) -> VirtualDriveABC: + folders = [None] * header.sub_folder_range.size + files = [None] * header.file_range.size + # noinspection PyTypeChecker + return VirtualDriveABC(header, folders, files) + + def load_toc(self, toc: ArchiveTableOfContentsABC): + self.load_folders(toc.folders) + self.load_files(toc.files) + + def load_folders(self, folders: List[FolderABC]): + if self.header.sub_folder_range.start < len(folders): + for folder_index in self.header.sub_folder_range: + sub_folder_index = folder_index - self.header.sub_folder_range.start + f = self.sub_folders[sub_folder_index] = folders[folder_index] + f.parent_drive = self + + def load_files(self, files: List[FileABC]): + if self.header.file_range.start < len(files): + for file_index in self.header.file_range: + sub_file_index = file_index - self.header.file_range.start + f = self.files[sub_file_index] = files[file_index] + f.parent_drive = self + + def build_tree(self): + self.sub_folders = [f for f in self.sub_folders if not f.parent_folder] + self.files = [f for f in self.files if not f.parent_folder] + + +ArchiveTOC = ArchiveTableOfContentsABC +ArchiveToCPtrABC = ArchiveTableOfContentsPtrABC + + +@dataclass +class DriveCollection: + drives: List[VirtualDrive] diff --git a/src/relic/sga/archive/__init__.py b/src/relic/sga/archive/__init__.py deleted file mode 100644 index 995f3a1..0000000 --- a/src/relic/sga/archive/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from .archive import Archive, DowIArchive, DowIIArchive, DowIIIArchive -from .header import ArchiveHeader, ArchiveVersion, DowIArchiveHeader, DowIIArchiveHeader, DowIIIArchiveHeader, ArchiveMagicWord - -__all__ = [ - "Archive", - "DowIArchive", - "DowIIArchive", - "DowIIIArchive", - "ArchiveHeader", - "ArchiveVersion", - "DowIArchiveHeader", - "DowIIArchiveHeader", - "DowIIIArchiveHeader", - - "ArchiveMagicWord", -] diff --git a/src/relic/sga/archive/archive.py b/src/relic/sga/archive/archive.py deleted file mode 100644 index 42fba30..0000000 --- a/src/relic/sga/archive/archive.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import BinaryIO, List, Type, Dict, TYPE_CHECKING - -from .header import ArchiveHeader -from ..common import ArchiveVersion -from ..hierarchy import DriveCollection, ArchiveWalk, walk -from ...common import VersionLike - -if TYPE_CHECKING: - from ..toc.toc import ArchiveTableOfContents - from ..toc.toc_headers import ArchiveTableOfContentsHeaders - from ..toc.toc_ptr import ArchiveTableOfContentsPtr - from ..vdrive.virtual_drive import VirtualDrive - - -@dataclass -class Archive(DriveCollection): - header: ArchiveHeader - """Sparse represents whether data was loaded on creation.""" - _sparse: bool - - def __init__(self, header: ArchiveHeader, drives: List[VirtualDrive], _sparse: bool): - self.header = header - self._sparse = _sparse - self.drives = drives - - def walk(self) -> ArchiveWalk: - return walk(self) - - @classmethod - def _unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): - from ..toc import ArchiveTableOfContents, ArchiveTableOfContentsPtr, ArchiveTableOfContentsHeaders - version = header.version - with header.toc_ptr.stream_jump_to(stream) as handle: - toc_ptr = ArchiveTableOfContentsPtr.unpack_version(handle, version) - toc_headers = ArchiveTableOfContentsHeaders.unpack(handle, toc_ptr, version) - toc = ArchiveTableOfContents.create(toc_headers) - - toc.load_toc() - toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up - if not sparse: - with header.data_ptr.stream_jump_to(stream) as handle: - toc.load_data(handle) - - return cls(header, toc.drives, sparse) - - @classmethod - def unpack(cls, stream: BinaryIO, read_magic: bool = True, sparse: bool = True, *, validate: bool = True) -> Archive: - header = ArchiveHeader.unpack(stream, read_magic) - if validate: - header.validate_checksums(stream) - class_type = _VERSION_MAP[header.version] - return class_type._unpack(stream, header, sparse) # Defer to subclass (ensures packing works as expected) - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -@dataclass(init=False) -class DowIArchive(Archive): - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -@dataclass(init=False) -class DowIIArchive(Archive): - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -@dataclass(init=False) -class DowIIIArchive(Archive): - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -_VERSION_MAP: Dict[VersionLike, Type[Archive]] = { - ArchiveVersion.Dow: DowIArchive, - ArchiveVersion.Dow2: DowIIArchive, - ArchiveVersion.Dow3: DowIIIArchive -} diff --git a/src/relic/sga/archive/header.py b/src/relic/sga/archive/header.py deleted file mode 100644 index fff58ec..0000000 --- a/src/relic/sga/archive/header.py +++ /dev/null @@ -1,246 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from hashlib import md5 -from typing import BinaryIO, Dict, Type, Tuple - -from serialization_tools.ioutil import WindowPtr, Ptr, iter_read, StreamPtr -from serialization_tools.magic import MagicWordIO -from serialization_tools.size import KiB -from serialization_tools.structx import Struct - -from ..common import ArchiveVersion -from ...common import VersionLike - -ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) - -_NAME_CHAR_COUNT = 64 # 64 characters max -_NAME_CHAR_SIZE = 2 # UTF-16-le ~ 2 bytes per character -_NAME_BYTE_SIZE = _NAME_CHAR_COUNT * _NAME_CHAR_SIZE - - -@dataclass -class ArchiveHeader: - name: str - - toc_ptr: Ptr - data_ptr: WindowPtr - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: - """ - Validates header checksums against the content's of the stream. - - The stream should return to its original position when it was passed in. - - :param stream: The binary stream to read from - :param fast: When true, slow checksums may be skipped - :param _assert: When true, an assertion is raised instead of returning False - :returns: True if all checksums match (or the type does not have checksums to validate) - :raises AssertionError: if a checksum does not match and _assert is True - """ - raise NotImplementedError - - @property - def version(self) -> VersionLike: - raise NotImplementedError - - @classmethod - def _unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': - raise NotImplementedError - - def _pack(self, stream: BinaryIO) -> int: - raise NotImplementedError - - @classmethod - def unpack(cls, stream: BinaryIO, read_magic: bool = True) -> 'ArchiveHeader': - # TODO move read_magic and unpack out of unpack - if read_magic: - ArchiveMagicWord.assert_magic_word(stream, True) - - version = ArchiveVersion.unpack_version(stream) - header_class = _HEADER_VERSION_MAP.get(version) - - if not header_class: - raise NotImplementedError(version) - - return header_class._unpack(stream) - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - written = 0 - - if write_magic: - written += ArchiveMagicWord.write_magic_word(stream) - - written += ArchiveVersion.pack_version(stream, self.version) - written += self._pack(stream) - return written - - -def _gen_md5_checksum(stream: BinaryIO, eigen: bytes, buffer_size: int = 64 * KiB, ptr: Ptr = None) -> bytes: - hasher = md5(eigen) if eigen else md5() - ptr = ptr or StreamPtr(stream) # Quick way to preserve stream integrity - with ptr.stream_jump_to(stream) as handle: - for buffer in iter_read(handle, buffer_size): - hasher.update(buffer) - return bytes.fromhex(hasher.hexdigest()) - - -def _validate_md5_checksum(stream: BinaryIO, ptr: WindowPtr, eigen: bytes, expected: bytes, buffer_size: int = 1024 * 64, _assert: bool = True) -> bool: - result = _gen_md5_checksum(stream, eigen, buffer_size, ptr=ptr) - if _assert: - assert expected == result, (expected, result) - return True - else: - return expected == result - - -@dataclass -class DowIArchiveHeader(ArchiveHeader): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET - LAYOUT = Struct(f"< 16s {_NAME_BYTE_SIZE}s 16s 2L") - # The eigen value is a guid? also knew that layout looked familiar - MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= _validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - @property - def version(self) -> VersionLike: - return ArchiveVersion.Dow - - @classmethod - def _unpack(cls, stream: BinaryIO) -> 'DowIArchiveHeader': - csum_a, name, csum_b, toc_size, data_offset = cls.LAYOUT.unpack_stream(stream) - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=stream.tell(), size=toc_size) - - data_ptr = WindowPtr(offset=data_offset, size=None) - - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b)) - - def _pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset - return self.LAYOUT.pack_stream(stream, *args) - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - - -@dataclass -class DowIIArchiveHeader(ArchiveHeader): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET, TOC_POS, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"< 16s {_NAME_BYTE_SIZE}s 16s 3L 3L") - # Copied from DowI, may be different; praying it isn't - # UGH THIER DIFFERENT! Or the way to calculate them is different - # First, let's try no eigen # (None, None) # HAH TROLLED MYSELF, forgot to conert checksum to hex - MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - unk: int - - # This may not mirror DowI one-to-one, until it's verified, it stays here - # noinspection DuplicatedCode - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - # return True - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= _validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - - @property - def version(self) -> VersionLike: - return ArchiveVersion.Dow2 - - @classmethod - def _unpack(cls, stream: BinaryIO) -> 'DowIIArchiveHeader': - csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) - - assert rsv_1 == 1 - assert rsv_0 == 0 - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_offset) - - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) - - def _pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk - return self.LAYOUT.pack_stream(stream, *args) - - -@dataclass -class DowIIIArchiveHeader(ArchiveHeader): - # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"<{_NAME_BYTE_SIZE}s Q L Q L 3L 256s") - toc_ptr: WindowPtr - data_ptr: WindowPtr - - unk: bytes - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: - """ - Dawn of War III does not contain any checksums, and so will always return true. - - :param stream: Ignored - :param fast: Ignored - :param _assert: Ignored - :returns: True - """ - return True - - @property - def version(self) -> VersionLike: - return ArchiveVersion.Dow3 - - @classmethod - def _unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, toc_pos, toc_size, data_pos, data_size, rsv_0_a, rsv_1, rsv_0_b, unk = cls.LAYOUT.unpack_stream(stream) - - assert rsv_1 == 1 - assert rsv_0_a == 0 - assert rsv_0_b == 0 - - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_pos, size=data_size) - name = name.decode("utf-16-le").rstrip("\0") - - return cls(name, toc_ptr, data_ptr, unk) - - def _pack(self, stream: BinaryIO) -> int: - args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 0, 1, 0, self.unk - return self.LAYOUT.pack_stream(stream, *args) - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version - -_HEADER_VERSION_MAP: Dict[VersionLike, Type[ArchiveHeader]] = { - ArchiveVersion.Dow: DowIArchiveHeader, - ArchiveVersion.Dow2: DowIIArchiveHeader, - ArchiveVersion.Dow3: DowIIIArchiveHeader -} diff --git a/src/relic/sga/checksums.py b/src/relic/sga/checksums.py new file mode 100644 index 0000000..22b9db9 --- /dev/null +++ b/src/relic/sga/checksums.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from hashlib import md5 +from typing import BinaryIO + +from serialization_tools.ioutil import Ptr, StreamPtr, iter_read, WindowPtr +from serialization_tools.size import KiB + + +def gen_md5_checksum(stream: BinaryIO, eigen: bytes, buffer_size: int = 64 * KiB, ptr: Ptr = None) -> bytes: + hasher = md5(eigen) if eigen else md5() + ptr = ptr or StreamPtr(stream) # Quick way to preserve stream integrity + with ptr.stream_jump_to(stream) as handle: + for buffer in iter_read(handle, buffer_size): + hasher.update(buffer) + return bytes.fromhex(hasher.hexdigest()) + + +def validate_md5_checksum(stream: BinaryIO, ptr: WindowPtr, eigen: bytes, expected: bytes, buffer_size: int = KiB * 64, _assert: bool = True) -> bool: + result = gen_md5_checksum(stream, eigen, buffer_size, ptr=ptr) + if _assert: + assert expected == result, (expected, result) + return True + else: + return expected == result diff --git a/src/relic/sga/common.py b/src/relic/sga/common.py index c864708..e9b1fb8 100644 --- a/src/relic/sga/common.py +++ b/src/relic/sga/common.py @@ -3,18 +3,22 @@ from dataclasses import dataclass from typing import Optional, Iterator, BinaryIO +from serialization_tools.magic import MagicWordIO from serialization_tools.structx import Struct -from ..common import VersionEnum, Version, VersionLike +from relic.common import VersionEnum, Version, VersionLike ArchiveVersionLayout = Struct("< 2H") class ArchiveVersion(VersionEnum): Unsupported = None - Dow = Version(2) - Dow2 = Version(5) - Dow3 = Version(9) + v2 = Version(2) + Dow = v2 + v5 = Version(5) + Dow2 = v5 + v9 = Version(9) + Dow3 = v9 @classmethod def unpack_version(cls, stream: BinaryIO) -> Version: @@ -51,3 +55,6 @@ def __iter__(self) -> ArchiveRange: def __next__(self) -> int: return next(self.__iterable) + + +ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) \ No newline at end of file diff --git a/src/relic/sga/file/__init__.py b/src/relic/sga/file/__init__.py deleted file mode 100644 index 899da3b..0000000 --- a/src/relic/sga/file/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -from .file import File -from .header import FileHeader, DowIFileHeader, DowIIFileHeader, DowIIIFileHeader, FileCompressionFlag - -__all__ = [ - "File", - "FileHeader", - "FileCompressionFlag", - "DowIFileHeader", - "DowIIFileHeader", - "DowIIIFileHeader", -] diff --git a/src/relic/sga/file/file.py b/src/relic/sga/file/file.py deleted file mode 100644 index 83effdf..0000000 --- a/src/relic/sga/file/file.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations - -import zlib -from dataclasses import dataclass -from pathlib import PurePosixPath -from typing import BinaryIO, Dict, Optional, TYPE_CHECKING - -from .header import FileHeader -if TYPE_CHECKING: - from ..folder.folder import Folder - from ..toc.toc import ArchiveTableOfContents - from ..vdrive.virtual_drive import VirtualDrive - - -@dataclass -class File: - header: FileHeader - name: str - data: Optional[bytes] = None - _decompressed: bool = False - _parent: Optional[Folder] = None - _drive: Optional[VirtualDrive] = None - - @property - def data_loaded(self) -> bool: - return self.data is not None - - @property - def expects_decompress(self) -> bool: - return self.header.compressed - - @property - def decompressed(self) -> bool: - if self.data_loaded: - return self._decompressed or not self.expects_decompress - else: - return False - - @property - def full_path(self) -> PurePosixPath: - if self._parent: - return self._parent.full_path / self.name - elif self._drive: - return self._drive.full_path / self.name - else: - return PurePosixPath(self.name) - - @classmethod - def create(cls, header: FileHeader) -> File: - _decompressed = False - # noinspection PyTypeChecker - return File(header, None, None, _decompressed) - - def load_name_from_lookup(self, name_lookup: Dict[int, str]): - self.name = name_lookup[self.header.name_sub_ptr.offset] - - def load_toc(self, toc: ArchiveTableOfContents): - self.load_name_from_lookup(toc.names) - - def read_data(self, stream: BinaryIO, decompress: bool = False) -> bytes: - with self.header.data_sub_ptr.stream_jump_to(stream) as handle: - buffer = handle.read(self.header.compressed_size) - if decompress and self.expects_decompress: - return zlib.decompress(buffer) - else: - return buffer - - def load_data(self, stream: BinaryIO, decompress: bool = False): - self.data = self.read_data(stream, decompress) - self._decompressed = decompress - - def get_decompressed_data(self) -> bytes: - if self.decompressed: - return self.data - else: - # zlib_header = Struct("2B").unpack(self.data[:2]) - # full_zlib_header = (zlib_header[0] & 0xF0) >> 4, zlib_header[0] & 0xF, \ - # (zlib_header[1] & 0b11000000) >> 6, (zlib_header[1] >> 5) & 0b1, zlib_header[1] & 0b11111 - # convert = {7: 32, 6: 16} - # assert convert[full_zlib_header[0]] == self.header.compression_flag.value - return zlib.decompress(self.data) - - def decompress(self): - self.data = self.get_decompressed_data() - self._decompressed = True diff --git a/src/relic/sga/file/header.py b/src/relic/sga/file/header.py deleted file mode 100644 index 61ddf0a..0000000 --- a/src/relic/sga/file/header.py +++ /dev/null @@ -1,151 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import BinaryIO, ClassVar, Type, Dict - -from serialization_tools.ioutil import Ptr, WindowPtr -from serialization_tools.structx import Struct - -from ..common import ArchiveVersion -from ...common import VersionLike - - -class FileCompressionFlag(Enum): - # Compression flag is either 0 (Decompressed) or 16/32 which are both compressed - # Aside from 0; these appear to be the Window-Sizes for the Zlib Compression (In KibiBytes) - Decompressed = 0 - - Compressed16 = 16 - Compressed32 = 32 - - def compressed(self) -> bool: - return self != FileCompressionFlag.Decompressed - - -@dataclass -class FileHeader: - LAYOUT: ClassVar[Struct] - name_sub_ptr: Ptr # Sub ptr is expected to be used via window (E.G. 'WindowPtr() as handle', then, 'data_sub_ptr.stream_jump_to(handle)') - data_sub_ptr: Ptr - decompressed_size: int - compressed_size: int - - def __eq__(self, other): - # TODO ptr equality - return self.decompressed_size == other.decompressed_size and self.compressed_size == other.compressed_size - - @property - def compressed(self): - raise NotImplementedError - - @classmethod - def _unpack(cls, stream: BinaryIO) -> FileHeader: - raise NotImplementedError - - def _pack(self, stream: BinaryIO) -> int: - raise NotImplementedError - - def pack(self, stream: BinaryIO) -> int: - return self._pack(stream) - - @classmethod - def unpack(cls, stream: BinaryIO, version: VersionLike) -> FileHeader: - header_class = _HEADER_VERSION_MAP.get(version) - - if not header_class: - raise NotImplementedError(version) - - return header_class._unpack(stream) - - -@dataclass -class DowIFileHeader(FileHeader): - # name - LAYOUT = Struct(f"<5L") - compression_flag: FileCompressionFlag - - def __eq__(self, other): - return self.compression_flag == other.compression_flag and super().__eq__(other) - - - @classmethod - def _unpack(cls, stream: BinaryIO) -> DowIFileHeader: - name_offset, compression_flag_value, data_offset, decompressed_size, compressed_size = cls.LAYOUT.unpack_stream(stream) - compression_flag = FileCompressionFlag(compression_flag_value) - name_ptr = Ptr(name_offset) - data_ptr = WindowPtr(data_offset, compressed_size) - return cls(name_ptr, data_ptr, decompressed_size, compressed_size, compression_flag) - - def _pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.compression_flag.value, self.data_sub_ptr.offset, self.decompressed_size, self.compressed_size) - - @property - def compressed(self): - return self.compression_flag.compressed() - - -@dataclass -class DowIIFileHeader(FileHeader): - LAYOUT = Struct(f"<5L H") - unk_a: int - unk_b: int - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size - - @classmethod - def _unpack(cls, stream: BinaryIO) -> DowIIFileHeader: - name_off, data_off, comp_size, decomp_size, unk_a, unk_b = cls.LAYOUT.unpack_stream(stream) - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b) - - def _pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b) - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) - -@dataclass -class DowIIIFileHeader(FileHeader): - LAYOUT = Struct("< 7L H L") - unk_a: int - unk_b: int - unk_c: int - unk_d: int # 256? - unk_e: int - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and self.unk_c == other.unk_c and self.unk_d == other.unk_d and self.unk_e == other.unk_e and super().__eq__(other) - - @classmethod - def _unpack(cls, stream: BinaryIO) -> DowIIIFileHeader: - name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e = cls.LAYOUT.unpack_stream(stream) - # assert unk_a == 0, (unk_a, 0) - # assert unk_b == 0, (unk_b, 0) - # UNK_D is a new compression flag?! - # if comp_size != decomp_size: - # assert unk_d in [256,512], ((comp_size, decomp_size), (unk_d, [256,512]), (name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e)) - # Pulling stuff out of my ass; but dividing them by the max block size gets you 7, 6 respectively - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d, unk_e) - - def _pack(self, stream: BinaryIO) -> int: - args = self.name_sub_ptr.offset, self.unk_a, self.data_sub_ptr.offset, self.unk_b, self.compressed_size, self.decompressed_size, self.unk_c, self.unk_d, self.unk_e - return self.LAYOUT.pack_stream(stream, *args) - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size - - -_HEADER_VERSION_MAP: Dict[VersionLike, Type[FileHeader]] = { - ArchiveVersion.Dow: DowIFileHeader, - ArchiveVersion.Dow2: DowIIFileHeader, - ArchiveVersion.Dow3: DowIIIFileHeader -} diff --git a/src/relic/sga/folder/__init__.py b/src/relic/sga/folder/__init__.py deleted file mode 100644 index a7f1432..0000000 --- a/src/relic/sga/folder/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .folder import Folder -from .header import FolderHeader, DowIFolderHeader, DowIIFolderHeader, DowIIIFolderHeader - -__all__ = [ - "Folder", - "FolderHeader", - "DowIFolderHeader", - "DowIIFolderHeader", - "DowIIIFolderHeader" -] diff --git a/src/relic/sga/folder/folder.py b/src/relic/sga/folder/folder.py deleted file mode 100644 index 6322e3a..0000000 --- a/src/relic/sga/folder/folder.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import PurePosixPath -from typing import Dict, List, Optional, TYPE_CHECKING - -from ..hierarchy import DriveChild, FolderCollection, FileCollection, FolderChild, walk - -if TYPE_CHECKING: - from ..file.file import File - from ..toc.toc import ArchiveTableOfContents - from ..vdrive.virtual_drive import VirtualDrive - from .header import FolderHeader - from ..hierarchy import ArchiveWalk - - -@dataclass -class Folder(FolderCollection, FileCollection, FolderChild, DriveChild): - header: FolderHeader - name: str - - def __init__(self, header: FolderHeader, name: str, sub_folders: List[Folder], files: List[File], parent_folder: Optional[Folder] = None, drive: Optional[VirtualDrive] = None): - self.header = header - self.name = name - self.sub_folders = sub_folders - self.files = files - self._drive = drive - self._parent = parent_folder - - @property - def full_path(self) -> PurePosixPath: - if self._drive: - return self._drive.full_path / self.name - else: - return PurePosixPath(self.name) - - def walk(self) -> ArchiveWalk: - return walk(self) - - @classmethod - def create(cls, header: FolderHeader) -> Folder: - name = None - folders = [None] * header.sub_folder_range.size - files = [None] * header.file_range.size - # noinspection PyTypeChecker - return Folder(header, name, folders, files) - - def load_toc(self, toc: ArchiveTableOfContents): - self.load_folders(toc.folders) - self.load_files(toc.files) - self.load_name_from_lookup(toc.names) - - def load_name_from_lookup(self, name_lookup: Dict[int, str]): - self.name = name_lookup[self.header.name_offset] - - def load_folders(self, folders: List[Folder]): - if self.header.sub_folder_range.start < len(folders): - for folder_index in self.header.sub_folder_range: - sub_folder_index = folder_index - self.header.sub_folder_range.start - f = self.sub_folders[sub_folder_index] = folders[folder_index] - f._parent = self - - def load_files(self, files: List[File]): - if self.header.file_range.start < len(files): - for file_index in self.header.file_range: - sub_file_index = file_index - self.header.file_range.start - f = self.files[sub_file_index] = files[file_index] - f._parent = self diff --git a/src/relic/sga/folder/header.py b/src/relic/sga/folder/header.py deleted file mode 100644 index 217e010..0000000 --- a/src/relic/sga/folder/header.py +++ /dev/null @@ -1,61 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import ClassVar, BinaryIO, Dict, Type - -from serialization_tools.structx import Struct - -from ...common import VersionLike -from ..common import ArchiveRange, ArchiveVersion - - -@dataclass -class FolderHeader: - LAYOUT: ClassVar[Struct] - - name_offset: int - sub_folder_range: ArchiveRange - file_range: ArchiveRange - - @classmethod - def unpack(cls, stream: BinaryIO, version: VersionLike) -> 'FolderHeader': - header_class = _HEADER_VERSION_MAP.get(version) - - if not header_class: - raise NotImplementedError(version) - - return header_class._unpack(stream) - - def _pack(self, stream: BinaryIO) -> int: - args = self.name_offset, self.sub_folder_range.start, self.sub_folder_range.end, \ - self.file_range.start, self.file_range.end - return self.LAYOUT.pack_stream(stream, *args) - - @classmethod - def _unpack(cls, stream: BinaryIO) -> 'FolderHeader': - name_offset, sub_folder_start, sub_folder_end, file_start, file_end = cls.LAYOUT.unpack_stream(stream) - sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) - file_range = ArchiveRange(file_start, file_end) - return cls(name_offset, sub_folder_range, file_range) - - -@dataclass -class DowIFolderHeader(FolderHeader): - LAYOUT = Struct("< L 4H") - - -@dataclass -class DowIIFolderHeader(FolderHeader): - LAYOUT = Struct("< L 4H") - - -@dataclass -class DowIIIFolderHeader(FolderHeader): - LAYOUT = Struct("< 5L") - - -_HEADER_VERSION_MAP: Dict[VersionLike, Type[FolderHeader]] = { - ArchiveVersion.Dow: DowIFolderHeader, - ArchiveVersion.Dow2: DowIIFolderHeader, - ArchiveVersion.Dow3: DowIIIFolderHeader -} diff --git a/src/relic/sga/hierarchy.py b/src/relic/sga/hierarchy.py deleted file mode 100644 index a175c98..0000000 --- a/src/relic/sga/hierarchy.py +++ /dev/null @@ -1,77 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import PurePath -from typing import List, Optional, Union, Tuple, Iterable, TYPE_CHECKING - -if TYPE_CHECKING: - from .file import File - from .folder import Folder - from .vdrive import VirtualDrive - - -@dataclass -class DriveCollection: - drives: List[VirtualDrive] - - -@dataclass -class FolderCollection: - sub_folders: List[Folder] - - -@dataclass -class FileCollection: - files: List[File] - - -@dataclass -class FolderChild: - _parent: Optional[Folder] - - -@dataclass -class DriveChild: - _drive: Optional[VirtualDrive] - - -ArchivePath = PurePath - -if TYPE_CHECKING: - ArchiveWalk = Iterable[Tuple[Optional[VirtualDrive], Optional[Folder], Iterable[Folder], Iterable[File]]] -else: - ArchiveWalk = Iterable[Tuple['VirtualDrive', Optional['Folder'], Iterable['Folder'], Iterable['File']]] - - -def walk(collection: Union[DriveCollection, FolderCollection, FileCollection]) -> ArchiveWalk: - from .folder import Folder - from .vdrive import VirtualDrive - - drives = collection.drives if isinstance(collection, DriveCollection) else [] - sub_folders = collection.sub_folders if isinstance(collection, FolderCollection) else [] - files = collection.files if isinstance(collection, FileCollection) and not isinstance(collection,VirtualDrive) else [] - - root_drive = collection if isinstance(collection, VirtualDrive) else None - root_folder = collection if isinstance(collection, Folder) else None - - # TODO optimize - # logically, we can only walk folder OR drive - if root_drive is None and root_folder is None and len(sub_folders) == 0 and len(files) == 0: - # I don't think we need to return ANYTHING if we won't be iterating over it - pass - # if len(drives) == 0: # We will only yield this item, so we return this to always iterate over something - # yield root_drive, root_folder, sub_folders, files - else: - yield root_drive, root_folder, sub_folders, files # at least one of these isn't None/Empty so we yield iti - - for drive in drives: - for d, f, folds, files, in walk(drive): - d = d or drive or root_drive - f = f or root_folder - yield d, f, folds, files - - for folder in sub_folders: - for d, f, folds, files in walk(folder): - d = d or root_drive - f = f or folder or root_folder - yield d, f, folds, files diff --git a/src/relic/sga/io.py b/src/relic/sga/io.py new file mode 100644 index 0000000..558ade1 --- /dev/null +++ b/src/relic/sga/io.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import Dict, Type, BinaryIO, Union, Any + +from relic.common import VersionLike +from relic.sga.vX import APIvX +from relic.sga.abc_ import DriveCollection, FolderCollectionABC, FileCollectionABC, ArchiveABC +from relic.sga.common import ArchiveMagicWord, ArchiveVersion +from relic.sga.protocols import ArchiveHeader, Archive, ArchiveWalk, VirtualDrive, Folder + + +def unpack_archive_header(versions: Dict[VersionLike, Type[ArchiveHeader]], stream: BinaryIO, read_magic: bool = True) -> ArchiveHeader: + if read_magic: + ArchiveMagicWord.assert_magic_word(stream, True) + + version = ArchiveVersion.unpack_version(stream) + try: + header_class = versions[version] + except KeyError as e: + raise NotImplementedError(version) from e + + return header_class.unpack(stream) + + +def pack_archive_header(header: ArchiveHeader, stream: BinaryIO, write_magic: bool = True) -> int: + written = 0 + + if write_magic: + written += ArchiveMagicWord.write_magic_word(stream) + + written += ArchiveVersion.pack_version(stream, header.version) + written += header.pack(stream) + return written + + +def pack_archive(archive: Archive, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[VersionLike, APIvX] = None, *, validate: bool = True) -> ArchiveABC: + ArchiveMagicWord.assert_magic_word(stream, True) + version = ArchiveVersion.unpack_version(stream) + api = versions[version] + header = api.ArchiveHeader.unpack(stream) + if validate: + header.validate_checksums(stream) + return api.Archive.unpack(stream, header, sparse) # Defer to subclass (ensures packing works as expected) + + +def walk(collection: Union[DriveCollection, FolderCollectionABC, FileCollectionABC]) -> ArchiveWalk: + drives = collection.drives if isinstance(collection, DriveCollection) else [] + sub_folders = collection.sub_folders if isinstance(collection, FolderCollectionABC) else [] + files = collection.files if isinstance(collection, FileCollectionABC) and not isinstance(collection, VirtualDrive) else [] + + root_drive = collection if isinstance(collection, VirtualDrive) else None + root_folder = collection if isinstance(collection, Folder) else None + + # TODO optimize + # logically, we can only walk folder OR drive + if root_drive is None and root_folder is None and len(sub_folders) == 0 and len(files) == 0: + # I don't think we need to return ANYTHING if we won't be iterating over it + pass + # if len(drives) == 0: # We will only yield this item, so we return this to always iterate over something + # yield root_drive, root_folder, sub_folders, files + else: + yield root_drive, root_folder, sub_folders, files # at least one of these isn't None/Empty so we yield iti + + for drive in drives: + for d, f, folds, files, in walk(drive): + d = d or drive or root_drive + f = f or root_folder + yield d, f, folds, files + + for folder in sub_folders: + for d, f, folds, files in walk(folder): + d = d or root_drive + f = f or folder or root_folder + yield d, f, folds, files diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py new file mode 100644 index 0000000..a228d1f --- /dev/null +++ b/src/relic/sga/protocols.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from pathlib import PurePath +from typing import BinaryIO, Protocol, runtime_checkable, List, Optional, Iterable, Tuple + +from serialization_tools.ioutil import Ptr, WindowPtr + +from relic.common import VersionLike + + +@runtime_checkable +class ArchiveHeader(Protocol): + name: str + toc_ptr: Ptr + data_ptr: WindowPtr + + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: + """ + Validates header checksums against the content's of the stream. + + The stream should return to its original position when it was passed in. + + :param stream: The binary stream to read from + :param fast: When true, slow checksums may be skipped + :param _assert: When true, an assertion is raised instead of returning False + :returns: True if all checksums match (or the type does not have checksums to validate) + :raises AssertionError: if a checksum does not match and _assert is True + """ + raise NotImplementedError + + @classmethod + @property + def version(cls) -> VersionLike: + raise NotImplementedError + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + raise NotImplementedError + + def pack(self, stream: BinaryIO) -> int: + raise NotImplementedError + + +class DriveCollection(Protocol): + drives: List[VirtualDrive] + + +class FolderCollection(Protocol): + sub_folders: List[Folder] + + +class FileCollection(Protocol): + files: List[File] + + +class FolderChild(Protocol): + parent_folder: Optional[Folder] + + +class DriveChild(Protocol): + parent_drive: Optional[VirtualDrive] + + +class VirtualDrive(FolderCollection, FileCollection, Protocol): + ... + + +class Folder(FolderCollection, FileCollection, FolderChild, DriveChild, Protocol): + ... + + +class File(FolderChild, DriveChild, Protocol): + ... + + +class Archive(DriveCollection, Protocol): + header: ArchiveHeader + """Sparse represents whether data was loaded on creation.""" + _sparse: bool + + def walk(self) -> ArchiveWalk: + raise NotImplementedError + # return walk(self) + + @classmethod + def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): + raise NotImplementedError + # version = header.version + # with header.toc_ptr.stream_jump_to(stream) as handle: + # toc_ptr = ArchiveTableOfContentsPtrABC.unpack_version(handle, version) + # toc_headers = ArchiveTableOfContentsHeadersABC.unpack(handle, toc_ptr, version) + # toc = ArchiveTableOfContentsABC.create(toc_headers) + # + # toc.load_toc() + # toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up + # if not sparse: + # with header.data_ptr.stream_jump_to(stream) as handle: + # toc.load_data(handle) + + # return cls(header, toc.drives, sparse) + + def pack(self, stream: BinaryIO) -> int: + raise NotImplementedError + + +ArchiveWalk = Iterable[Tuple[Optional[VirtualDrive], Optional[Folder], Iterable[Folder], Iterable[File]]] +ArchivePath = PurePath diff --git a/src/relic/sga/toc/__init__.py b/src/relic/sga/toc/__init__.py deleted file mode 100644 index 26ffa98..0000000 --- a/src/relic/sga/toc/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from .toc import ArchiveTableOfContents -from .toc_headers import ArchiveTableOfContentsHeaders -from .toc_ptr import ArchiveTableOfContentsPtr, TocItemPtr, DowIArchiveToCPtr, DowIIArchiveToCPtr, DowIIIArchiveToCPtr - -__all__ = [ - "ArchiveTableOfContentsHeaders", - "ArchiveTableOfContentsPtr", - "ArchiveTableOfContents", - "TocItemPtr", - "DowIArchiveToCPtr", - "DowIIArchiveToCPtr", - "DowIIIArchiveToCPtr", -] diff --git a/src/relic/sga/toc/toc.py b/src/relic/sga/toc/toc.py deleted file mode 100644 index c51cbde..0000000 --- a/src/relic/sga/toc/toc.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import List, Dict, BinaryIO, TYPE_CHECKING - -from .toc_headers import ArchiveTableOfContentsHeaders - -if TYPE_CHECKING: - from ..file.file import File - from ..folder.folder import Folder - from ..vdrive.virtual_drive import VirtualDrive - - -@dataclass -class ArchiveTableOfContents: - drives: List[VirtualDrive] - folders: List[Folder] - files: List[File] - names: Dict[int, str] - - @classmethod - def create(cls, toc_headers: ArchiveTableOfContentsHeaders) -> ArchiveTableOfContents: - from ..vdrive.virtual_drive import VirtualDrive - from ..file.file import File - from ..folder.folder import Folder - - drives = [VirtualDrive.create(header) for header in toc_headers.drives] - folders = [Folder.create(header) for header in toc_headers.folders] - files = [File.create(header) for header in toc_headers.files] - - return ArchiveTableOfContents(drives, folders, files, toc_headers.names) - - def load_data(self, stream: BinaryIO): - for _ in self.files: - _.load_data(stream) - - def load_toc(self): - for _ in self.drives: - _.load_toc(self) - for _ in self.folders: - _.load_toc(self) - for _ in self.files: - _.load_toc(self) - - def build_tree(self): - for _ in self.drives: - _.build_tree() - - -ArchiveTOC = ArchiveTableOfContents diff --git a/src/relic/sga/toc/toc_headers.py b/src/relic/sga/toc/toc_headers.py deleted file mode 100644 index e0e1148..0000000 --- a/src/relic/sga/toc/toc_headers.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import List, Dict, BinaryIO - -from ..file.header import FileHeader -from ..folder.header import FolderHeader -from .toc_ptr import ArchiveTableOfContentsPtr -from ..vdrive.header import VirtualDriveHeader -from ...common import VersionLike - -_NULL = "\0".encode("ascii") -_KIBI = 1024 -_BUFFER_SIZE = 64 * _KIBI - - -@dataclass -class ArchiveTableOfContentsHeaders: - drives: List[VirtualDriveHeader] - folders: List[FolderHeader] - files: List[FileHeader] - names: Dict[int, str] - - @classmethod - def unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtr, version: VersionLike = None) -> ArchiveTableOfContentsHeaders: - version = version or ptr.version # abusing the fact that the classes know their own version to avoid explicitly passing it in - - local_ptr = ptr.virtual_drive_ptr - with local_ptr.stream_jump_to(stream) as handle: - virtual_drives = [VirtualDriveHeader.unpack(handle, version) for _ in range(local_ptr.count)] - - local_ptr = ptr.folder_ptr - with local_ptr.stream_jump_to(stream) as handle: - folders = [FolderHeader.unpack(handle, version) for _ in range(local_ptr.count)] - - local_ptr = ptr.file_ptr - with local_ptr.stream_jump_to(stream) as handle: - files = [FileHeader.unpack(handle, version) for _ in range(local_ptr.count)] - - # This gets a bit wierd - local_ptr = ptr.name_ptr - names: Dict[int, str] = {} - with local_ptr.stream_jump_to(stream) as handle: - start = stream.tell() # use stream to avoid invalidating window - while len(names) < local_ptr.count: - remaining = local_ptr.count - len(names) - current = stream.tell() # Get relative pos to start - buffer = handle.read(_BUFFER_SIZE) - terminal_null = buffer.endswith(_NULL) - parts = buffer.split(_NULL, remaining) - - offset = 0 - for i, p in enumerate(parts): - if i == len(parts) - 1: - break - names[current - start + offset] = p.decode("ascii") - offset += len(p) + 1 # +1 to include null terminal - - if not terminal_null: - stream.seek(current + offset) - - return ArchiveTableOfContentsHeaders(virtual_drives, folders, files, names) diff --git a/src/relic/sga/toc/toc_ptr.py b/src/relic/sga/toc/toc_ptr.py deleted file mode 100644 index 0f3f839..0000000 --- a/src/relic/sga/toc/toc_ptr.py +++ /dev/null @@ -1,108 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import BinaryIO, Dict, Type, ClassVar, Tuple - -from serialization_tools.ioutil import Ptr -from serialization_tools.structx import Struct - -from ...common import VersionLike -from ..archive import ArchiveVersion - - -@dataclass -class TocItemPtr(Ptr): - def __init__(self, offset: int, count: int, whence: int = 0): - super().__init__(offset, whence) - self.count = count - - -@dataclass -class ArchiveTableOfContentsPtr: - # Virtual Drives (offset, count), Folder (offset, count), File (offset, count), Names (offset, count) - LAYOUT: ClassVar[Struct] - virtual_drive_ptr: TocItemPtr - folder_ptr: TocItemPtr - file_ptr: TocItemPtr - name_ptr: TocItemPtr - - @property - def version(self) -> ArchiveVersion: - raise NotImplementedError - - @classmethod - def _unpack_tuple(cls, stream: BinaryIO) -> Tuple[TocItemPtr, TocItemPtr, TocItemPtr, TocItemPtr]: - vd_offset, vd_count, fold_offset, fold_count, file_offset, file_count, name_offset, name_count = cls.LAYOUT.unpack_stream(stream) - vd_ptr = TocItemPtr(vd_offset, vd_count) - fold_ptr = TocItemPtr(fold_offset, fold_count) - file_ptr = TocItemPtr(file_offset, file_count) - name_ptr = TocItemPtr(name_offset, name_count) - return vd_ptr, fold_ptr, file_ptr, name_ptr - - def _pack_tuple(self) -> Tuple[int, int, int, int, int, int, int, int]: - return self.virtual_drive_ptr.offset, self.virtual_drive_ptr.count, \ - self.folder_ptr.offset, self.folder_ptr.count, \ - self.file_ptr.offset, self.file_ptr.count, \ - self.name_ptr.offset, self.name_ptr.count - - @classmethod - def unpack_version(cls, stream: BinaryIO, version: VersionLike) -> 'ArchiveTableOfContentsPtr': - toc_ptr_class = _ToCPtr_VERSION_MAP.get(version) - - if not toc_ptr_class: - raise NotImplementedError(version) - - return toc_ptr_class.unpack(stream) - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'ArchiveTableOfContentsPtr': - args = cls._unpack_tuple(stream) - return cls(*args) - - def pack(self, stream: BinaryIO) -> int: - args = self._pack_tuple() - return self.LAYOUT.pack_stream(stream, *args) - - def __str__(self): - parts = [f"{k}={v}" for k,v in self.__dict__.items()] - return f"{self.__class__.__name__}({', '.join(parts)})" - - def __repr__(self): - return str(self) - -# Alias -ArchiveToCPtr = ArchiveTableOfContentsPtr - - -@dataclass -class DowIArchiveToCPtr(ArchiveToCPtr): - @property - def version(self) -> ArchiveVersion: - return ArchiveVersion.Dow - - LAYOUT = Struct("< LH LH LH LH") - - -@dataclass -class DowIIArchiveToCPtr(ArchiveToCPtr): - LAYOUT = DowIArchiveToCPtr.LAYOUT - - @property - def version(self) -> ArchiveVersion: - return ArchiveVersion.Dow2 - - -@dataclass -class DowIIIArchiveToCPtr(ArchiveToCPtr): - LAYOUT = Struct("< 8L") - - @property - def version(self) -> ArchiveVersion: - return ArchiveVersion.Dow3 - - -_ToCPtr_VERSION_MAP: Dict[VersionLike, Type[ArchiveToCPtr]] = { - ArchiveVersion.Dow: DowIArchiveToCPtr, - ArchiveVersion.Dow2: DowIIArchiveToCPtr, - ArchiveVersion.Dow3: DowIIIArchiveToCPtr -} diff --git a/src/relic/sga/v2.py b/src/relic/sga/v2.py new file mode 100644 index 0000000..35f5110 --- /dev/null +++ b/src/relic/sga/v2.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import BinaryIO, Tuple, Type, ClassVar + +from serialization_tools.ioutil import WindowPtr, Ptr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga import abc_ +from relic.sga.abc_ import ArchiveHeaderABC, ArchiveABC, FileHeaderABC, FolderHeaderABC, VirtualDriveHeaderABC, ArchiveToCPtrABC, ArchiveTableOfContentsHeadersABC +from relic.sga.checksums import validate_md5_checksum +from relic.sga.common import ArchiveVersion +from relic.sga.vX import APIvX + +version = ArchiveVersion.v2 + + +class _V2: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V2): + LAYOUT = Struct("< LH LH LH LH") + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V2): + # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET + LAYOUT = Struct(f"< 16s 128s 16s 2L") + # The eigen value is a guid? also knew that layout looked familiar + MD5_EIGENVALUES = (b"E01519D6-2DB7-4640-AF54-0A23319C56C3", b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF") + toc_ptr: WindowPtr + checksums: Tuple[bytes, bytes] + + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): + ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] + valid = True + indexes = (1,) if fast else (0, 1) + for i in indexes: + valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) + return valid + + @classmethod + @property + def version(cls) -> VersionLike: + return ArchiveVersion.Dow + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + csum_a, name, csum_b, toc_size, data_offset = cls.LAYOUT.unpack_stream(stream) + + name = name.decode("utf-16-le").rstrip("\0") + toc_ptr = WindowPtr(offset=stream.tell(), size=toc_size) + data_ptr = WindowPtr(offset=data_offset, size=None) + return cls(name, toc_ptr, data_ptr, (csum_a, csum_b)) + + def pack(self, stream: BinaryIO) -> int: + args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset + return self.LAYOUT.pack_stream(stream, *args) + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] + + +class FileCompressionFlag(Enum): + # Compression flag is either 0 (Decompressed) or 16/32 which are both compressed + # Aside from 0; these appear to be the Window-Sizes for the Zlib Compression (In KibiBytes) + Decompressed = 0 + + Compressed16 = 16 + Compressed32 = 32 + + def compressed(self) -> bool: + return self != FileCompressionFlag.Decompressed + + +@dataclass +class FileHeader(FileHeaderABC, _V2): + # name + LAYOUT = Struct(f"<5L") + compression_flag: FileCompressionFlag + + def __eq__(self, other): + return self.compression_flag == other.compression_flag and super().__eq__(other) + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_offset, compression_flag_value, data_offset, decompressed_size, compressed_size = cls.LAYOUT.unpack_stream(stream) + compression_flag = FileCompressionFlag(compression_flag_value) + name_ptr = Ptr(name_offset) + data_ptr = WindowPtr(data_offset, compressed_size) + return cls(name_ptr, data_ptr, decompressed_size, compressed_size, compression_flag) + + def pack(self, stream: BinaryIO) -> int: + return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.compression_flag.value, self.data_sub_ptr.offset, self.decompressed_size, self.compressed_size) + + @property + def compressed(self): + return self.compression_flag.compressed() + + +@dataclass +class FolderHeader(FolderHeaderABC, _V2): + LAYOUT = Struct("< L 4H") + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V2): + LAYOUT = Struct("< 64s 64s 4H 2s") + + +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +@dataclass(init=False) +class Archive(ArchiveABC, _V2): + TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr + TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +# Class Aliases; don't need to be inherited +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +class APIv2(APIvX, _V2): + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + ArchiveHeader = ArchiveHeader + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga/v5.py b/src/relic/sga/v5.py new file mode 100644 index 0000000..4efc36d --- /dev/null +++ b/src/relic/sga/v5.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO, Tuple, ClassVar, Type + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga import v2 +from relic.sga.abc_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC +from relic.sga.checksums import validate_md5_checksum +from relic.sga.common import ArchiveVersion +from relic.sga.v2 import ArchiveToCPtrABC +from relic.sga import abc_ +from relic.sga.vX import APIvX + +version = ArchiveVersion.v5 + + +class _V5: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V5): + LAYOUT = Struct("< 64s 64s 4H 2s") + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V5): + LAYOUT = v2.ArchiveToCPtr.LAYOUT + + +@dataclass +class FolderHeader(FolderHeaderABC, _V5): + LAYOUT = Struct("< L 4H") + + +@dataclass +class FileHeader(FileHeaderABC, _V5): + LAYOUT = Struct(f"<5L H") + unk_a: int + unk_b: int + + @property + def compressed(self): + return self.compressed_size < self.decompressed_size + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_off, data_off, comp_size, decomp_size, unk_a, unk_b = cls.LAYOUT.unpack_stream(stream) + # Name, File, Compressed, Decompressed, ???, ??? + name_ptr = Ptr(name_off) + data_ptr = Ptr(data_off) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b) + + def pack(self, stream: BinaryIO) -> int: + return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b) + + def __eq__(self, other): + return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V5): + # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET, TOC_POS, RESERVED:1, RESERVED:0?, UNK??? + LAYOUT = Struct(f"< 16s 128s 16s 3L 3L") + # Copied from DowI, may be different; praying it isn't + # UGH THIER DIFFERENT! Or the way to calculate them is different + # First, let's try no eigen # (None, None) # HAH TROLLED MYSELF, forgot to conert checksum to hex + MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) + toc_ptr: WindowPtr + checksums: Tuple[bytes, bytes] + unk: int + + # This may not mirror DowI one-to-one, until it's verified, it stays here + # noinspection DuplicatedCode + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): + # return True + ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] + valid = True + indexes = (1,) if fast else (0, 1) + for i in indexes: + valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) + return valid + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name and self.unk == other.unk \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] + + @property + def version(self) -> VersionLike: + return ArchiveVersion.Dow2 + + @classmethod + def _unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': + csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) + + assert rsv_1 == 1 + assert rsv_0 == 0 + + name = name.decode("utf-16-le").rstrip("\0") + toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) + data_ptr = WindowPtr(offset=data_offset) + + return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) + + def _pack(self, stream: BinaryIO) -> int: + args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk + return self.LAYOUT.pack_stream(stream, *args) + + +# noinspection DuplicatedCode +# Code is identical; but meaning is completely different; using _V5 instead of _V2 +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +@dataclass(init=False) +class Archive(ArchiveABC, _V5): + TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr + TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +# noinspection DuplicatedCode +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +class APIv5(APIvX, _V5): + ArchiveHeader = ArchiveHeader + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py new file mode 100644 index 0000000..cee9499 --- /dev/null +++ b/src/relic/sga/v9.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga import abc_ +from relic.sga.abc_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC +from relic.sga.common import ArchiveVersion +from relic.sga.protocols import ArchiveHeader +from relic.sga.vX import APIvX + +version = ArchiveVersion.v9 + + +class _V9: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V9): + LAYOUT = Struct("< 64s 64s 4L 4s") + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V9): + LAYOUT = Struct("< 8L") + + +@dataclass +class FolderHeader(FolderHeaderABC, _V9): + LAYOUT = Struct("< 5L") + + +@dataclass +class FileHeader(FileHeaderABC, _V9): + LAYOUT = Struct("< 7L H L") + unk_a: int + unk_b: int + unk_c: int + unk_d: int # 256? + unk_e: int + + def __eq__(self, other): + return self.unk_a == other.unk_a and self.unk_b == other.unk_b and self.unk_c == other.unk_c and self.unk_d == other.unk_d and self.unk_e == other.unk_e and super().__eq__(other) + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e = cls.LAYOUT.unpack_stream(stream) + # assert unk_a == 0, (unk_a, 0) + # assert unk_b == 0, (unk_b, 0) + # UNK_D is a new compression flag?! + # if comp_size != decomp_size: + # assert unk_d in [256,512], ((comp_size, decomp_size), (unk_d, [256,512]), (name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e)) + # Pulling stuff out of my ass; but dividing them by the max block size gets you 7, 6 respectively + # Name, File, Compressed, Decompressed, ???, ??? + name_ptr = Ptr(name_off) + data_ptr = Ptr(data_off) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d, unk_e) + + def pack(self, stream: BinaryIO) -> int: + args = self.name_sub_ptr.offset, self.unk_a, self.data_sub_ptr.offset, self.unk_b, self.compressed_size, self.decompressed_size, self.unk_c, self.unk_d, self.unk_e + return self.LAYOUT.pack_stream(stream, *args) + + @property + def compressed(self): + return self.compressed_size < self.decompressed_size + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V9): + # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? + LAYOUT = Struct(f"<128s Q L Q L 3L 256s") + toc_ptr: WindowPtr + data_ptr: WindowPtr + + unk: bytes + + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: + """ + Dawn of War III does not contain any checksums, and so will always return true. + + :param stream: Ignored + :param fast: Ignored + :param _assert: Ignored + :returns: True + """ + return True + + @property + def version(self) -> VersionLike: + return ArchiveVersion.Dow3 + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + name, toc_pos, toc_size, data_pos, data_size, rsv_0_a, rsv_1, rsv_0_b, unk = cls.LAYOUT.unpack_stream(stream) + + assert rsv_1 == 1 + assert rsv_0_a == 0 + assert rsv_0_b == 0 + + toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) + data_ptr = WindowPtr(offset=data_pos, size=data_size) + name = name.decode("utf-16-le").rstrip("\0") + + return cls(name, toc_ptr, data_ptr, unk) + + def pack(self, stream: BinaryIO) -> int: + args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 0, 1, 0, self.unk + return self.LAYOUT.pack_stream(stream, *args) + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name and self.unk == other.unk \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version + + +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +@dataclass(init=False) +class Archive(ArchiveABC, _V9): + TOC_PTR_CLS = ArchiveToCPtr + TOC_HEADERS_CLS = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +class APIv9(APIvX, _V9): + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + ArchiveHeader = ArchiveHeader + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga/vX.py b/src/relic/sga/vX.py new file mode 100644 index 0000000..c6036b6 --- /dev/null +++ b/src/relic/sga/vX.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from types import ModuleType +from typing import Type, Protocol, ClassVar + +from relic.sga import abc_, protocols +from relic.sga.common import ArchiveVersion + + +class APIvX(Protocol): + """ + Allows us to have a TYPED OBJECT with required types for each version + + """ + + version:ClassVar[ArchiveVersion] + # Archive + Archive: Type[protocols.Archive] + ArchiveHeader: Type[protocols.ArchiveHeader] + # Table Of Contents + ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] + ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] + # Files + FileHeader: Type[abc_.FileHeaderABC] + File: Type[protocols.File] + # Folders + FolderHeader: Type[abc_.FolderHeaderABC] + Folder: Type[protocols.Folder] + # VDrive + VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] + VirtualDrive: Type[protocols.VirtualDrive] + + +"""Modules implementing vX should define all of the following attributes""" +required_attrs = APIvX.__annotations__.keys() + + +def is_module_api(module: ModuleType): + return all(hasattr(module, attr) for attr in required_attrs) diff --git a/src/relic/sga/vdrive/__init__.py b/src/relic/sga/vdrive/__init__.py deleted file mode 100644 index d105b6e..0000000 --- a/src/relic/sga/vdrive/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .virtual_drive import VirtualDrive -from .header import VirtualDriveHeader, DowIVirtualDriveHeader, DowIIVirtualDriveHeader, DowIIIVirtualDriveHeader - -__all__ = [ - "VirtualDrive", - "VirtualDriveHeader", - "DowIVirtualDriveHeader", - "DowIIVirtualDriveHeader", - "DowIIIVirtualDriveHeader", -] diff --git a/src/relic/sga/vdrive/header.py b/src/relic/sga/vdrive/header.py deleted file mode 100644 index 547b816..0000000 --- a/src/relic/sga/vdrive/header.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import ClassVar, BinaryIO, Dict, Type - -from serialization_tools.structx import Struct - -from ...common import VersionLike -from ..common import ArchiveVersion, ArchiveRange - - -@dataclass -class VirtualDriveHeader: - LAYOUT: ClassVar[Struct] - - path: str - name: str - - sub_folder_range: ArchiveRange - file_range: ArchiveRange - unk: bytes - - @classmethod - def unpack(cls, stream: BinaryIO, version: VersionLike) -> 'VirtualDriveHeader': - header_class = _HEADER_VERSION_MAP.get(version) - - if not header_class: - raise NotImplementedError(version) - - return header_class._unpack(stream) - - def _pack(self, stream: BinaryIO) -> int: - args = self.path.encode("ascii"), self.name.encode("ascii"), self.sub_folder_range.start, self.sub_folder_range.end, \ - self.file_range.start, self.file_range.end, 0 - return self.LAYOUT.pack_stream(stream, *args) - - @classmethod - def _unpack(cls, stream: BinaryIO) -> 'VirtualDriveHeader': - path, name, sub_folder_start, sub_folder_end, file_start, file_end, unk = cls.LAYOUT.unpack_stream(stream) - path, name = path.decode("ascii").rstrip("\00"), name.decode("ascii").rstrip("\00") - sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) - file_range = ArchiveRange(file_start, file_end) - return cls(path, name, sub_folder_range, file_range, unk) - - -@dataclass -class DowIVirtualDriveHeader(VirtualDriveHeader): - LAYOUT = Struct("< 64s 64s 4H 2s") - - -@dataclass -class DowIIVirtualDriveHeader(VirtualDriveHeader): - LAYOUT = Struct("< 64s 64s 4H 2s") - - -@dataclass -class DowIIIVirtualDriveHeader(VirtualDriveHeader): - LAYOUT = Struct("< 64s 64s 4L 4s") - - -_HEADER_VERSION_MAP: Dict[VersionLike, Type[VirtualDriveHeader]] = { - ArchiveVersion.Dow: DowIVirtualDriveHeader, - ArchiveVersion.Dow2: DowIIVirtualDriveHeader, - ArchiveVersion.Dow3: DowIIIVirtualDriveHeader -} diff --git a/src/relic/sga/vdrive/virtual_drive.py b/src/relic/sga/vdrive/virtual_drive.py deleted file mode 100644 index 3a8da0d..0000000 --- a/src/relic/sga/vdrive/virtual_drive.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import PurePosixPath -from typing import List, TYPE_CHECKING - -from ..hierarchy import FileCollection, FolderCollection, ArchiveWalk, walk - -if TYPE_CHECKING: - from ..file.file import File - from ..folder.folder import Folder - from ..vdrive.header import VirtualDriveHeader - from ..toc.toc import ArchiveTableOfContents - - -@dataclass -class VirtualDrive(FolderCollection, FileCollection): - header: VirtualDriveHeader - - def __init__(self, header: VirtualDriveHeader, sub_folders: List[Folder], files: List[File]): - self.header = header - self.sub_folders = sub_folders - self.files = files - - @property - def path(self) -> str: - return self.header.path - - @property - def name(self) -> str: - return self.header.name - - def walk(self) -> ArchiveWalk: - return walk(self) - - @property - def full_path(self) -> PurePosixPath: - return PurePosixPath(self.path + ":") - - @classmethod - def create(cls, header: VirtualDriveHeader) -> VirtualDrive: - folders = [None] * header.sub_folder_range.size - files = [None] * header.file_range.size - # noinspection PyTypeChecker - return VirtualDrive(header, folders, files) - - def load_toc(self, toc: ArchiveTableOfContents): - self.load_folders(toc.folders) - self.load_files(toc.files) - - def load_folders(self, folders: List[Folder]): - if self.header.sub_folder_range.start < len(folders): - for folder_index in self.header.sub_folder_range: - sub_folder_index = folder_index - self.header.sub_folder_range.start - f = self.sub_folders[sub_folder_index] = folders[folder_index] - f._drive = self - - def load_files(self, files: List[File]): - if self.header.file_range.start < len(files): - for file_index in self.header.file_range: - sub_file_index = file_index - self.header.file_range.start - f = self.files[sub_file_index] = files[file_index] - f._drive = self - - def build_tree(self): - self.sub_folders = [f for f in self.sub_folders if not f._parent] - self.files = [f for f in self.files if not f._parent] diff --git a/src/scripts/dump_sga.py b/src/scripts/dump_sga.py index 108849c..80bebdc 100644 --- a/src/scripts/dump_sga.py +++ b/src/scripts/dump_sga.py @@ -5,9 +5,10 @@ from serialization_tools.walkutil import BlackList, WhiteList, filter_by_path, filter_by_file_extension, collapse_walk_on_files +import relic.sga.io from relic.config import DowIIIGame, DowIIGame, DowGame, filter_latest_dow_game, get_dow_root_directories -from relic.sga.archive import ArchiveMagicWord, Archive +from relic.sga.common.archive import ArchiveMagicWord, Archive def __safe_makedirs(path: str, use_dirname: bool = True): @@ -29,7 +30,7 @@ def walk_archive_paths(folder: os.PathLike, extensions: WhiteList = None, whitel walk = os.walk(folder) walk = filter_by_path(walk, whitelist=whitelist, blacklist=blacklist, prune=True) walk = filter_by_file_extension(walk, whitelist=extensions) - walk = ArchiveMagicWord.walk(walk) + walk = relic.sga.io.walk(walk) return collapse_walk_on_files(walk) @@ -40,11 +41,11 @@ def dump_archive(input_folder: os.PathLike, output_folder: os.PathLike, overwrit output_folder_path = Path(output_folder) for input_file_path in walk_archive_paths(input_folder): with open(input_file_path, "rb") as in_handle: - archive = Archive.unpack(in_handle) + archive = relic.sga.io.unpack_archive(in_handle) archive_name = splitext(basename(input_file_path))[0] with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: print(f"\tDumping '{archive_name}'") - for _, _, _, files in archive.walk(): + for _, _, _, files in relic.sga.io.walk(): for file in files: relative_file_path = file.full_path diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index 0d5f5c2..00b7c67 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Dict +import relic.sga.io from relic.sga import Archive from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner @@ -25,11 +26,11 @@ def extract_args(args: argparse.Namespace) -> Dict: def unpack_archive(in_path: str, out_path: str, print_opts: PrintOptions = None, prepend_archive_path: bool = True, indent_level: int = 0, **kwargs): out_path = Path(out_path) with open(in_path, "rb") as in_handle: - archive = Archive.unpack(in_handle) + archive = relic.sga.io.unpack_archive(in_handle) archive_name = splitext(basename(in_path))[0] with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: print_any(f"Unpacking \"{archive_name}\"...", indent_level, print_opts) - for _, _, _, files in archive.walk(): + for _, _, _, files in relic.sga.io.walk(): for file in files: try: relative_file_path = file.full_path diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index 585848b..529105c 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -3,21 +3,22 @@ import pytest -from relic.sga.archive import Archive, ArchiveMagicWord -from relic.sga.hierarchy import ArchiveWalk +from relic.sga.abc_ import ArchiveABC +from relic.sga.common import ArchiveMagicWord +from relic.sga.protocols import ArchiveWalk from tests.helpers import TF from tests.relic.sga.datagen import DowII, DowI, DowIII class ArchiveTests: - def assert_equal(self, expected: Archive, result: Archive, sparse: bool): + def assert_equal(self, expected: ArchiveABC, result: ArchiveABC, sparse: bool): assert expected.header == result.header if sparse: assert result._sparse # TODO @abstractmethod - def test_walk(self, archive: Archive, expected: ArchiveWalk): + def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): archive_walk = archive.walk() for (a_vdrive, a_folder, a_folders, a_files), (e_vdrive, e_folder, e_folders, e_files) in zip(archive_walk, expected): assert a_vdrive == e_vdrive @@ -26,27 +27,27 @@ def test_walk(self, archive: Archive, expected: ArchiveWalk): assert a_files == e_files @abstractmethod - def test_inner_unpack(self, stream_data: bytes, expected: Archive): + def test_unpack(self, stream_data: bytes, expected: ArchiveABC): for sparse in TF: with BytesIO(stream_data) as stream: - archive = expected.__class__._unpack(stream, expected.header, sparse) + archive = expected.__class__.unpack(stream, expected.header, sparse) assert expected.__class__ == archive.__class__ self.assert_equal(expected, archive, sparse) @abstractmethod - def test_unpack(self, stream_data: bytes, expected: Archive, valid_checksums: bool): + def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): for read_magic in TF: for sparse in TF: for validate in ([False] if not valid_checksums else TF): with BytesIO(stream_data) as stream: if not read_magic: stream.seek(ArchiveMagicWord.layout.size) - archive = Archive.unpack(stream, read_magic, sparse, validate=validate) + archive = ArchiveABC.unpack(stream, read_magic, sparse, validate=validate) assert expected.__class__ == archive.__class__ self.assert_equal(expected, archive, sparse) @abstractmethod - def test_pack(self, archive: Archive, expected: bytes): + def test_pack(self, archive: ArchiveABC, expected: bytes): for write_magic in TF: try: with BytesIO() as stream: @@ -75,22 +76,22 @@ def DOW1_ARCHIVE_WALK() -> ArchiveWalk: class TestDowIArchive(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) - def test_inner_unpack(self, stream_data: bytes, expected: Archive): - super().test_inner_unpack(stream_data, expected) + def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + super().test_unpack(stream_data, expected) - @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], - [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE, True)]) - def test_unpack(self, stream_data: bytes, expected: Archive, valid_checksums: bool): - super().test_unpack(stream_data, expected, valid_checksums) + # @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], + # [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE, True)]) + # def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): + # super().old_test_unpack(stream_data, expected, valid_checksums) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): + def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_ARCHIVE, DOW1_ARCHIVE_WALK())]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): + def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -112,22 +113,22 @@ def DOW2_ARCHIVE_WALK() -> ArchiveWalk: class TestDowIIArchive(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) - def test_inner_unpack(self, stream_data: bytes, expected: Archive): - super().test_inner_unpack(stream_data, expected) + def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + super().test_unpack(stream_data, expected) @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE, True)]) - def test_unpack(self, stream_data: bytes, expected: Archive, valid_checksums: bool): - super().test_unpack(stream_data, expected, valid_checksums) + def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): + super().old_test_unpack(stream_data, expected, valid_checksums) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): + def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_ARCHIVE, DOW2_ARCHIVE_WALK())]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): + def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -149,20 +150,20 @@ def DOW3_ARCHIVE_WALK() -> ArchiveWalk: class TestDowIIIArchive(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) - def test_inner_unpack(self, stream_data: bytes, expected: Archive): - super().test_inner_unpack(stream_data, expected) + def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + super().test_unpack(stream_data, expected) @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE, True)]) - def test_unpack(self, stream_data: bytes, expected: Archive, valid_checksums: bool): - super().test_unpack(stream_data, expected, valid_checksums) + def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): + super().old_test_unpack(stream_data, expected, valid_checksums) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): + def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_ARCHIVE, DOW3_ARCHIVE_WALK())]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): + def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) diff --git a/tests/relic/sga/archive/test_archive_header.py b/tests/relic/sga/archive/test_archive_header.py index a240d71..7520ec3 100644 --- a/tests/relic/sga/archive/test_archive_header.py +++ b/tests/relic/sga/archive/test_archive_header.py @@ -1,47 +1,50 @@ from abc import abstractmethod from io import BytesIO -from typing import List +from typing import List, Type import pytest from serialization_tools.ioutil import WindowPtr, Ptr from serialization_tools.size import KiB, MiB, GiB -from tests.relic.sga.datagen import DowI, DowII, DowII, DowIII -from tests.helpers import TF +import relic.sga.io from relic.common import Version -from relic.sga import ArchiveHeader, ArchiveVersion, DowIIIArchiveHeader, ArchiveMagicWord -from relic.sga.archive import header +from relic.sga.common import ArchiveMagicWord, ArchiveVersion +from relic.sga import protocols as proto, v2, v5, v9 +from relic.sga.checksums import gen_md5_checksum, validate_md5_checksum +from relic.sga.v9 import ArchiveHeader +from tests.relic.sga.datagen import DowI, DowII, DowIII +from tests.helpers import TF class ArchiveHeaderTests: @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_validate_checksums(self, archive: bytes): + def test_validate_checksums(self, archive: bytes, cls: Type[proto.ArchiveHeader]): for fast in TF: for _assert in TF: with BytesIO(archive) as stream: - archive_header = ArchiveHeader.unpack(stream) + archive_header = cls.unpack(stream) archive_header.validate_checksums(stream, fast=fast, _assert=_assert) @abstractmethod # Trick PyCharm into requiring us to redefine this def test_version(self, archive: ArchiveHeader, expected: Version): assert archive.version == expected - @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): + # @abstractmethod # Trick PyCharm into requiring us to redefine this + def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): with BytesIO(buffer) as stream: - result = expected.__class__._unpack(stream) + result = relic.sga.io.unpack_archive(stream) assert result == expected - @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_private_pack(self, inst: ArchiveHeader, expected: bytes): + # @abstractmethod # Trick PyCharm into requiring us to redefine this + def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): with BytesIO() as stream: - inst._pack(stream) + inst.pack(stream) stream.seek(0) result = stream.read() assert result == expected - @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): + # @abstractmethod # Trick PyCharm into requiring us to redefine this + def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): for read_magic in TF: with BytesIO(buffer) as stream: if not read_magic: @@ -58,8 +61,22 @@ def test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bo assert expected.__class__ == unpacked.__class__ assert expected == unpacked + @abstractmethod + def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): + with BytesIO(buffer) as stream: + unpacked = expected.__class__.unpack(stream) + assert expected == unpacked + + @abstractmethod + def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): + with BytesIO() as stream: + written = inst.pack(stream) + stream.seek(0) + packed = stream.read() + assert expected == packed + @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_pack(self, inst: ArchiveHeader, expected: bytes): + def old_test_pack(self, inst: proto.ArchiveHeader, expected: bytes): magic_size = ArchiveMagicWord.layout.size for write_magic in TF: with BytesIO() as stream: @@ -89,7 +106,7 @@ def test_gen_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr for buffer_size in buffer_sizes: with BytesIO(stream_data) as stream: - result = header._gen_md5_checksum(stream, eigen, buffer_size, ptr) + result = gen_md5_checksum(stream, eigen, buffer_size, ptr) assert md5_checksum == result @@ -105,7 +122,7 @@ def test_validate_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: b for buffer_size in buffer_sizes: try: with BytesIO(stream_data) as stream: - result = header._validate_md5_checksum(stream, ptr, eigen, md5_checksum, buffer_size, _assert) + result = validate_md5_checksum(stream, ptr, eigen, md5_checksum, buffer_size, _assert) # Own lines to make assertions clearer except AssertionError as e: if not fail_expected: # MD5 mismatch; if fail_expected we @@ -130,41 +147,29 @@ def fast_dow1_archive_header(name, toc_pos, bad_magic: bytes): DOW1_HEADER_INNER, DOW1_HEADER_INNER_DATA, _ = fast_dow1_archive_header("Dawn Of War 1 Test Header (Inner Pack)", 168, b"deaddead") DOW1_ARCHIVE_BUFFER = DowI.gen_sample_archive_buffer("Dawn Of War 1 Test Archive", "Tests", "Dow1 Header Tests.txt", b"You thought this was a test, but it was me, DIO!") +HDR_START = 12 # Most logic now doesn't handle Magic + Version + class TestDowIArchiveHeader(ArchiveHeaderTests): @pytest.mark.parametrize( - ["archive"], - [(DOW1_ARCHIVE_BUFFER,)]) - def test_validate_checksums(self, archive: bytes): - super().test_validate_checksums(archive) + ["archive", "cls"], + [(DOW1_ARCHIVE_BUFFER[HDR_START:], v2.ArchiveHeader)]) + def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): + super().test_validate_checksums(archive, cls) @pytest.mark.parametrize( ["expected", "inst"], - [(DOW1_HEADER_INNER_DATA[12:], DOW1_HEADER_INNER)] + [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] ) - def test_private_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_private_pack(inst, expected) + def test_pack(self, inst: ArchiveHeader, expected: bytes): + super().test_pack(inst, expected) @pytest.mark.parametrize( ["buffer", "expected"], - [(DOW1_HEADER_INNER_DATA[12:], DOW1_HEADER_INNER)] + [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] ) - def test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - super().test_private_unpack(buffer, expected) - - @pytest.mark.parametrize( - ["buffer", "expected", "bad_magic_word"], - [(DOW1_HEADER_DATA, DOW1_HEADER, False), - (DOW1_HEADER_DATA_BAD_MAGIC, DOW1_HEADER, True)] - ) - def test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - super().test_unpack(buffer, expected, bad_magic_word) - - @pytest.mark.parametrize( - ["inst", "expected"], - [(DOW1_HEADER, DOW1_HEADER_DATA)]) - def test_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) + def test_unpack(self, buffer: bytes, expected: ArchiveHeader): + super().test_unpack(buffer, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, ArchiveVersion.Dow)]) def test_version(self, archive: ArchiveHeader, expected: Version): @@ -188,36 +193,36 @@ class TestDowIIArchiveHeader(ArchiveHeaderTests): ["expected", "inst"], [(DOW2_HEADER_DATA[12:], DOW2_HEADER)], ) - def test_private_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_private_pack(inst, expected) + def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): + super().old_test_private_pack(inst, expected) @pytest.mark.parametrize( ["buffer", "expected"], [(DOW2_HEADER_DATA[12:], DOW2_HEADER)], ) - def test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - super().test_private_unpack(buffer, expected) + def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): + super().old_test_private_unpack(buffer, expected) @pytest.mark.parametrize( ["buffer", "expected", "bad_magic_word"], [(DOW2_HEADER_DATA, DOW2_HEADER, False), (DOW2_HEADER_DATA_BAD_MAGIC, DOW2_HEADER, True)], ) - def test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - super().test_unpack(buffer, expected, bad_magic_word) + def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): + super().old_test_unpack(buffer, expected, bad_magic_word) @pytest.mark.parametrize( ["inst", "expected"], [(DOW2_HEADER, DOW2_HEADER_DATA)]) - def test_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) + def old_test_pack(self, inst: ArchiveHeader, expected: bytes): + super().old_test_pack(inst, expected) @pytest.mark.parametrize( - ["archive"], - [(DOW2_ARCHIVE_BUFFER,)], + ["archive", "cls"], + [(DOW2_ARCHIVE_BUFFER, v5.ArchiveHeader)], ) - def test_validate_checksums(self, archive: bytes): - super().test_validate_checksums(archive) + def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): + super().test_validate_checksums(archive, cls) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, ArchiveVersion.Dow2)]) def test_version(self, archive: ArchiveHeader, expected: Version): @@ -234,41 +239,41 @@ def fast_dow3_archive_header(name, bad_magic: bytes): class TestDowIIIArchiveHeader(ArchiveHeaderTests): @pytest.mark.parametrize( - ["archive"], - [(None,)]) - def test_validate_checksums(self, archive: bytes): + ["archive", "cls"], + [(None, v9.ArchiveHeader)]) + def test_validate_checksums(self, archive: bytes, cls: Type[v9.ArchiveHeader]): for fast in TF: for _assert in TF: # HACK but if it fails it means logic has changed - assert DowIIIArchiveHeader.validate_checksums(None, None, fast=fast, _assert=_assert) + assert cls.validate_checksums(None, None, fast=fast, _assert=_assert) @pytest.mark.parametrize( ["expected", "inst"], [(DOW3_HEADER_DATA[12:], DOW3_HEADER)], ) - def test_private_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_private_pack(inst, expected) + def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): + super().old_test_private_pack(inst, expected) @pytest.mark.parametrize( ["buffer", "expected"], [(DOW3_HEADER_DATA[12:], DOW3_HEADER)], ) - def test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - super().test_private_unpack(buffer, expected) + def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): + super().old_test_private_unpack(buffer, expected) @pytest.mark.parametrize( ["buffer", "expected", "bad_magic_word"], [(DOW3_HEADER_DATA, DOW3_HEADER, False), (DOW3_HEADER_DATA_BAD_MAGIC, DOW3_HEADER, True)], ) - def test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - super().test_unpack(buffer, expected, bad_magic_word) + def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): + super().old_test_unpack(buffer, expected, bad_magic_word) @pytest.mark.parametrize( ["inst", "expected"], [(DOW3_HEADER, DOW3_HEADER_DATA)]) - def test_pack(self, inst: ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) + def old_test_pack(self, inst: ArchiveHeader, expected: bytes): + super().old_test_pack(inst, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, ArchiveVersion.Dow3)]) def test_version(self, archive: ArchiveHeader, expected: Version): diff --git a/tests/relic/sga/datagen.py b/tests/relic/sga/datagen.py index 83c7eb3..6fbab13 100644 --- a/tests/relic/sga/datagen.py +++ b/tests/relic/sga/datagen.py @@ -3,10 +3,10 @@ from serialization_tools.ioutil import WindowPtr, Ptr -from relic.sga import ArchiveHeader, DowIArchiveHeader, DowIIArchiveHeader, DowIIIArchiveHeader, VirtualDrive, Folder, File, DowIIArchive, DowIArchive, DowIIIArchive, \ - DowIIIFolderHeader, DowIIIFileHeader, DowIIIVirtualDriveHeader, DowIVirtualDriveHeader, DowIFolderHeader, DowIFileHeader, FileCompressionFlag, DowIIFolderHeader, DowIIVirtualDriveHeader, DowIIFileHeader +from relic.sga.protocols import ArchiveHeader +from relic.sga.abc_ import FileABC, FolderABC, VirtualDriveABC, ArchiveTOC +from relic.sga import v2, v5, v9 from relic.sga.common import ArchiveRange -from relic.sga.toc.toc import ArchiveTOC def encode_and_pad(v: str, byte_size: int, encoding: str) -> bytes: @@ -39,7 +39,7 @@ class DowI: @staticmethod def gen_archive_header(name: str, toc_size: int, data_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, toc_pos: int = 180) -> ArchiveHeader: - return DowIArchiveHeader(name, WindowPtr(toc_pos, toc_size), WindowPtr(data_offset), csums) + return v2.ArchiveHeader(name, WindowPtr(toc_pos, toc_size), WindowPtr(data_offset), csums) @staticmethod def gen_archive_header_buffer(name: str, toc_size: int, data_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, magic: bytes = b"_ARCHIVE") -> bytes: @@ -50,41 +50,41 @@ def gen_archive_header_buffer(name: str, toc_size: int, data_offset: int, csums: return magic + version + csums[0] + encoded_name + csums[1] + encoded_toc_size + encoded_data_offset @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> DowIVirtualDriveHeader: - return DowIVirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v2.VirtualDriveHeader: + return v2.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) @staticmethod def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK): return encode_and_pad(path, 64, "ascii") + encode_and_pad(name, 64, "ascii") + ushort(subfolder_offset) + ushort(subfolder_offset + subfolder_count) + ushort(file_offset) + ushort(file_count + file_offset) + unk @staticmethod - def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> DowIFolderHeader: - return DowIFolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v2.FolderHeader: + return v2.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) @staticmethod def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> bytes: return uint(name_offset) + ushort(subfolder_offset) + ushort(subfolder_offset + subfolder_count) + ushort(file_offset) + ushort(file_count + file_offset) @staticmethod - def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: FileCompressionFlag = None) -> DowIFileHeader: + def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: v2.FileCompressionFlag = None) -> v2.FileHeader: if comp_size is None: comp_size = decomp_size if comp_flag is None: if comp_size != decomp_size: - comp_flag = FileCompressionFlag.Compressed16 # IDK, just choose one + comp_flag = v2.FileCompressionFlag.Compressed16 # IDK, just choose one else: - comp_flag = FileCompressionFlag.Decompressed - return DowIFileHeader(Ptr(name_offset), WindowPtr(data_offset, comp_size), decomp_size, comp_size, comp_flag) + comp_flag = v2.FileCompressionFlag.Decompressed + return v2.FileHeader(Ptr(name_offset), WindowPtr(data_offset, comp_size), decomp_size, comp_size, comp_flag) @staticmethod - def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: FileCompressionFlag = None) -> bytes: + def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: v2.FileCompressionFlag = None) -> bytes: if comp_size is None: comp_size = decomp_size if comp_flag is None: if comp_size != decomp_size: - comp_flag = FileCompressionFlag.Compressed16 # IDK, just choose one + comp_flag = v2.FileCompressionFlag.Compressed16 # IDK, just choose one else: - comp_flag = FileCompressionFlag.Decompressed + comp_flag = v2.FileCompressionFlag.Decompressed return uint(name_offset) + uint(comp_flag.value) + uint(data_offset) + uint(decomp_size) + uint(comp_size) @staticmethod @@ -113,7 +113,7 @@ def gen_toc_ptr_buffer(vdrive: Tuple[int, int], folders: Tuple[int, int], files: return b"".join(parts) @staticmethod - def gen_toc(vdrive: VirtualDrive, folder: Folder, file: File, names: Dict[int, str]) -> ArchiveTOC: + def gen_toc(vdrive: VirtualDriveABC, folder: FolderABC, file: FileABC, names: Dict[int, str]) -> ArchiveTOC: return ArchiveTOC([vdrive], [folder], [file], names) @classmethod @@ -146,7 +146,7 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi return cls.gen_archive_buffer(archive_name, toc_ptr_buf, toc_buf, file_uncomp_data, magic) @classmethod - def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> DowIArchive: + def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> v2.Archive: def dirty_toc_hack(): name_buf, name_offsets = cls.gen_name_buffer(folder, file) vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) @@ -177,13 +177,13 @@ def gen_csum(buffer: bytes, eigen: bytes) -> bytes: vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = File(file_h, file, file_uncomp_data, True) - folder_ = Folder(folder_h, folder, [], [file_]) - vdrive_ = VirtualDrive(vdrive_h, [folder_], [file_]) - folder_._drive = file_._drive = vdrive_ - file_._parent = folder_ + file_ = v2.File(file_h, file, file_uncomp_data, True) + folder_ = v2.Folder(folder_h, folder, [], [file_]) + vdrive_ = v2.VirtualDrive(vdrive_h, [folder_], [file_]) + folder_.parent_drive = file_.parent_drive = vdrive_ + file_.parent_folder = folder_ header = cls.gen_archive_header(archive_name, len(toc_buf), len(toc_buf) + toc_pos, csums, toc_pos) - return DowIArchive(header, [vdrive_], False) + return v2.Archive(header, [vdrive_], False) class DowII: @@ -195,7 +195,7 @@ class DowII: @classmethod def gen_archive_header(cls, name: str, toc_size: int, data_offset: int, toc_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS) -> ArchiveHeader: - return DowIIArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset), csums, cls.ARCHIVE_HEADER_UNK_INT) + return v5.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset), csums, cls.ARCHIVE_HEADER_UNK_INT) @classmethod def gen_archive_header_buffer(cls, name: str, toc_size: int, data_offset: int, toc_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, magic: bytes = b"_ARCHIVE") -> bytes: @@ -207,21 +207,21 @@ def gen_archive_header_buffer(cls, name: str, toc_size: int, data_offset: int, t return magic + version + csums[0] + encoded_name + csums[1] + encoded_toc_size + encoded_data_offset + encoded_toc_offset + uint(1) + uint(0) + cls.ARCHIVE_HEADER_UNK @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> DowIIVirtualDriveHeader: - return DowIIVirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v5.VirtualDriveHeader: + return v5.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) gen_vdrive_header_buffer = DowI.gen_vdrive_header_buffer # Same exact layout; @staticmethod - def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> DowIIFolderHeader: - return DowIIFolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v5.FolderHeader: + return v5.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) gen_folder_header_buffer = DowI.gen_folder_header_buffer # Same exact layout; @staticmethod - def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> DowIIFileHeader: + def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> v5.FileHeader: comp_size = decomp_size if comp_size is None else comp_size - return DowIIFileHeader(Ptr(name_offset), Ptr(data_offset, comp_size), decomp_size, comp_size, 0, 0) + return v5.FileHeader(Ptr(name_offset), Ptr(data_offset, comp_size), decomp_size, comp_size, 0, 0) @staticmethod def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> bytes: @@ -262,7 +262,7 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi return cls.gen_archive_buffer(archive_name, toc_ptr_buf, toc_buf, file_uncomp_data, magic) @classmethod - def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> DowIIArchive: + def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> v5.Archive: def dirty_toc_hack(): name_buf, name_offsets = cls.gen_name_buffer(folder, file) vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) @@ -293,13 +293,13 @@ def gen_csum(buffer: bytes, eigen: bytes) -> bytes: vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = File(file_h, file, file_uncomp_data, True) - folder_ = Folder(folder_h, folder, [], [file_]) - vdrive_ = VirtualDrive(vdrive_h, [folder_], [file_]) - folder_._drive = file_._drive = vdrive_ - file_._parent = folder_ + file_ = FileABC(file_h, file, file_uncomp_data, True) + folder_ = FolderABC(folder_h, folder, [], [file_]) + vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) + folder_.parent_drive = file_.parent_drive = vdrive_ + file_.parent_folder = folder_ header = cls.gen_archive_header(archive_name, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), cls.ARCHIVE_HEADER_SIZE, csums) - return DowIIArchive(header, [vdrive_], False) + return v5.Archive(header, [vdrive_], False) class DowIII: @@ -309,7 +309,7 @@ class DowIII: @classmethod def gen_archive_header(cls, name: str, toc_offset: int, toc_size: int, data_offset: int, data_size: int) -> ArchiveHeader: - return DowIIIArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset, data_size), cls.ARCHIVE_HEADER_UNK) + return v9.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset, data_size), cls.ARCHIVE_HEADER_UNK) @classmethod def gen_archive_header_buffer(cls, name: str, toc_offset: int, toc_size: int, data_offset: int, data_size: int, magic: bytes = b"_ARCHIVE") -> bytes: @@ -322,26 +322,26 @@ def gen_archive_header_buffer(cls, name: str, toc_offset: int, toc_size: int, da return magic + version + encoded_name + encoded_toc_offset + encoded_toc_size + encoded_data_offset + encoded_data_size + uint(0) + uint(1) + uint(0) + cls.ARCHIVE_HEADER_UNK @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> DowIIIVirtualDriveHeader: - return DowIIIVirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v9.VirtualDriveHeader: + return v9.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) @staticmethod def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK): return encode_and_pad(path, 64, "ascii") + encode_and_pad(name, 64, "ascii") + uint(subfolder_offset) + uint(subfolder_offset + subfolder_count) + uint(file_offset) + uint(file_count + file_offset) + unk @staticmethod - def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> DowIIIFolderHeader: - return DowIIIFolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v9.FolderHeader: + return v9.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) @staticmethod def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> bytes: return uint(name_offset) + uint(subfolder_offset) + uint(subfolder_offset + subfolder_count) + uint(file_offset) + uint(file_count + file_offset) @staticmethod - def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> DowIIIFileHeader: + def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> v9.FileHeader: if comp_size is None: comp_size = decomp_size - return DowIIIFileHeader(Ptr(name_offset), Ptr(data_offset), decomp_size, comp_size, 0, 0, 0, 0, 0) + return v9.FileHeader(Ptr(name_offset), Ptr(data_offset), decomp_size, comp_size, 0, 0, 0, 0, 0) @staticmethod def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> bytes: @@ -368,7 +368,7 @@ def gen_toc_ptr_buffer(vdrive: Tuple[int, int], folders: Tuple[int, int], files: return b"".join(parts) @staticmethod - def gen_toc(vdrive: VirtualDrive, folder: Folder, file: File, names: Dict[int, str]) -> ArchiveTOC: + def gen_toc(vdrive: VirtualDriveABC, folder: FolderABC, file: FileABC, names: Dict[int, str]) -> ArchiveTOC: return ArchiveTOC([vdrive], [folder], [file], names) @classmethod @@ -391,16 +391,16 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi return cls.gen_archive_buffer(archive_name, toc_ptr_buf, toc_buf, file_uncomp_data, magic) @classmethod - def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes) -> DowIIIArchive: + def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes) -> v9.Archive: name_buf, name_offsets = cls.gen_name_buffer(folder, file) vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = File(file_h, file, file_uncomp_data, True) - folder_ = Folder(folder_h, folder, [], [file_]) - vdrive_ = VirtualDrive(vdrive_h, [folder_], [file_]) - folder_._drive = file_._drive = vdrive_ - file_._parent = folder_ + file_ = FileABC(file_h, file, file_uncomp_data, True) + folder_ = FolderABC(folder_h, folder, [], [file_]) + vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) + folder_.parent_drive = file_.parent_drive = vdrive_ + file_.parent_folder = folder_ vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) @@ -412,4 +412,4 @@ def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_unco full_toc = toc_ptr_buf + toc_buf header = cls.gen_archive_header(archive_name, cls.ARCHIVE_HEADER_SIZE, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), len(file_uncomp_data)) - return DowIIIArchive(header, [vdrive_], False) + return v9.Archive(header, [vdrive_], False) diff --git a/tests/relic/sga/file/test_file_header.py b/tests/relic/sga/file/test_file_header.py index 3d4032e..d2e68c4 100644 --- a/tests/relic/sga/file/test_file_header.py +++ b/tests/relic/sga/file/test_file_header.py @@ -4,13 +4,14 @@ import pytest from relic.common import VersionLike -from relic.sga import FileHeader, ArchiveVersion +from relic.sga.common import ArchiveVersion +from relic.sga.abc_ import FileHeaderABC from tests.relic.sga.datagen import DowI, DowII, DowIII class FileHeaderTests: @abstractmethod - def test_pack(self, header: FileHeader, expected: bytes): + def test_pack(self, header: FileHeaderABC, expected: bytes): with BytesIO() as stream: written = header.pack(stream) assert written == len(expected) @@ -18,23 +19,23 @@ def test_pack(self, header: FileHeader, expected: bytes): assert stream.read() == expected @abstractmethod - def test_inner_pack(self, header: FileHeader, expected: bytes): + def test_inner_pack(self, header: FileHeaderABC, expected: bytes): with BytesIO() as stream: - written = header._pack(stream) + written = header.pack(stream) assert written == len(expected) stream.seek(0) assert stream.read() == expected @abstractmethod - def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): + def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): with BytesIO(data_stream) as stream: - header = expected.__class__._unpack(stream) + header = expected.__class__.old_unpack(stream) assert header == expected @abstractmethod - def test_unpack(self, data_stream: bytes, expected: FileHeader, version: VersionLike): + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): with BytesIO(data_stream) as stream: - header = FileHeader.unpack(stream, version) + header = FileHeaderABC.old_unpack(stream, version) assert header == expected @@ -43,19 +44,19 @@ def test_unpack(self, data_stream: bytes, expected: FileHeader, version: Version class TestDowIFileHeader(FileHeaderTests): @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_pack(self, header: FileHeader, expected: bytes): + def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeader, expected: bytes): + def test_inner_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW1_HEADER, DOW1_HEADER_BUFFER, ArchiveVersion.Dow)]) - def test_unpack(self, data_stream: bytes, expected: FileHeader, version: VersionLike): + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): super().test_unpack(data_stream, expected, version) @pytest.mark.parametrize(["expected", "data_stream"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): + def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): super().test_inner_unpack(data_stream, expected) @@ -64,19 +65,19 @@ def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): class TestDowIIFileHeader(FileHeaderTests): @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_pack(self, header: FileHeader, expected: bytes): + def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeader, expected: bytes): + def test_inner_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW2_HEADER, DOW2_HEADER_BUFFER, ArchiveVersion.Dow2)]) - def test_unpack(self, data_stream: bytes, expected: FileHeader, version: VersionLike): + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): super().test_unpack(data_stream, expected, version) @pytest.mark.parametrize(["expected", "data_stream"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): + def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): super().test_inner_unpack(data_stream, expected) @@ -85,17 +86,17 @@ def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): class TestDowIIIFileHeader(FileHeaderTests): @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_pack(self, header: FileHeader, expected: bytes): + def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeader, expected: bytes): + def test_inner_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW3_HEADER, DOW3_HEADER_BUFFER, ArchiveVersion.Dow3)]) - def test_unpack(self, data_stream: bytes, expected: FileHeader, version: VersionLike): + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): super().test_unpack(data_stream, expected, version) @pytest.mark.parametrize(["expected", "data_stream"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeader): + def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): super().test_inner_unpack(data_stream, expected) diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py new file mode 100644 index 0000000..04496ed --- /dev/null +++ b/tests/relic/sga/test_vX_interface.py @@ -0,0 +1,40 @@ +from typing import Iterable, List, Tuple + +import relic.sga +from relic.sga import v2, v5, v9, vX +import pytest + +MODULES = [v2, v5, v9] +ATTRS = vX.required_attrs +APIS = relic.sga.APIS.values() + + +def _permutate(*items: List): + def inner_permutate(subset: List, remaining: Tuple[List]) -> Iterable: + for item in subset: + if len(remaining) > 1: + for sub_items in inner_permutate(remaining[0], remaining[1:]): + yield item, *sub_items # Not possiblie in 3.7-, but we target 3.9+ + else: + for sub_item in remaining[0]: + yield item, sub_item + + if len(items) == 0: + return items[0] + else: + return inner_permutate(items[0], items[1:]) + + +@pytest.mark.parametrize(["module"], [(m,) for m in MODULES]) +def test_module_is_vX_api(module): + assert vX.is_module_api(module) + + +@pytest.mark.parametrize(["module", "attr"], _permutate(MODULES, ATTRS)) +def test_module_has_required_vX_attr(module, attr: str): + assert hasattr(module, attr) + + +@pytest.mark.parametrize(["api", "attr"], _permutate(APIS, ATTRS)) +def test_api_has_required_vX_attr(api, attr: str): + assert hasattr(api, attr) From 945831f984f696a35f910d36dfd5590eb089b152 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Wed, 8 Jun 2022 16:26:23 -0800 Subject: [PATCH 02/19] pytests refactored to match version-refactoring tests pass but the other archive tests should be PRd first; then cherry-picked merged into this one, and fixed for the new API --- src/relic/sga/abc_.py | 48 ++++--- src/relic/sga/common.py | 38 +++++- src/relic/sga/io.py | 35 +---- src/relic/sga/protocols.py | 20 ++- src/relic/sga/v5.py | 4 +- tests/relic/sga/archive/test_archive.py | 66 ++------- .../relic/sga/archive/test_archive_header.py | 129 ++++-------------- tests/relic/sga/file/test_file_header.py | 55 ++------ tests/relic/sga/test_vX_interface.py | 2 + 9 files changed, 137 insertions(+), 260 deletions(-) diff --git a/src/relic/sga/abc_.py b/src/relic/sga/abc_.py index f1ce99d..afba742 100644 --- a/src/relic/sga/abc_.py +++ b/src/relic/sga/abc_.py @@ -12,18 +12,14 @@ # import relic.sga.io from relic.common import VersionLike -from relic.sga.common import ArchiveRange, ArchiveVersion +from relic.sga.common import ArchiveRange, ArchiveVersion, walk # from relic.sga.io import walk -from relic.sga.protocols import ArchiveHeader, Archive, FileCollection, FolderCollection, Folder, File, VirtualDrive, ArchiveWalk +from relic.sga.protocols import ArchiveHeader, Archive, FileCollection, FolderCollection, Folder, File, VirtualDrive, ArchiveWalk, DriveCollection, DriveChild, FolderChild, ArchiveWalkable _NULL = b"\0" _BUFFER_SIZE = 64 * KiB -def walk(self): - raise NotImplementedError # Currently causes cyclic dependencies; needs a fix - - @dataclass class ArchiveTableOfContentsABC: drives: List[VirtualDriveABC] @@ -219,6 +215,12 @@ class ArchiveHeaderABC(ArchiveHeader, ABC): class ArchiveABC(Archive): header: ArchiveHeader """Sparse represents whether data was loaded on creation.""" + + def walk(self) -> ArchiveWalk: + for drive in self.drives: + for inner_walk in drive.walk(): + yield inner_walk + _sparse: bool def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse: bool): @@ -226,9 +228,6 @@ def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse self._sparse = _sparse self.drives = drives - def walk(self) -> ArchiveWalk: - return walk(self) - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveTableOfContentsPtrABC TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeadersABC TOC_CLS: ClassVar[Type[ArchiveTableOfContentsABC]] = ArchiveTableOfContentsABC @@ -372,17 +371,29 @@ class FileCollectionABC(FileCollection): @dataclass -class FolderChild: +class FolderChildABC(FolderChild): parent_folder: Optional[Folder] @dataclass -class DriveChild: +class DriveChildABC(DriveChild): parent_drive: Optional[VirtualDrive] @dataclass -class FolderABC(Folder, FolderCollectionABC, FileCollectionABC, FolderChild, DriveChild): +class DriveCollectionABC(DriveCollection): + drives: List[VirtualDrive] + + +@dataclass +class FolderABC(Folder, FolderCollectionABC, FileCollectionABC, FolderChildABC, DriveChildABC): + def walk(self) -> ArchiveWalk: + drive = self.parent_drive + yield drive, self, self.sub_folders, self.files + for folder in self.sub_folders: + for inner_walk in folder.walk(): + yield inner_walk + header: FolderHeaderABC name: str @@ -401,9 +412,6 @@ def full_path(self) -> PurePosixPath: else: return PurePosixPath(self.name) - def walk(self) -> ArchiveWalk: - return walk(self) - @classmethod def create(cls, header: FolderHeaderABC) -> FolderABC: name = None @@ -519,7 +527,10 @@ def name(self) -> str: return self.header.name def walk(self) -> ArchiveWalk: - return walk(self) + yield self, None, self.sub_folders, self.files + for folder in self.sub_folders: + for inner_walk in folder.walk(): + yield inner_walk @property def full_path(self) -> PurePosixPath: @@ -557,8 +568,3 @@ def build_tree(self): ArchiveTOC = ArchiveTableOfContentsABC ArchiveToCPtrABC = ArchiveTableOfContentsPtrABC - - -@dataclass -class DriveCollection: - drives: List[VirtualDrive] diff --git a/src/relic/sga/common.py b/src/relic/sga/common.py index e9b1fb8..1b6b2d6 100644 --- a/src/relic/sga/common.py +++ b/src/relic/sga/common.py @@ -1,12 +1,13 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, Iterator, BinaryIO +from typing import Optional, Iterator, BinaryIO, Union from serialization_tools.magic import MagicWordIO from serialization_tools.structx import Struct from relic.common import VersionEnum, Version, VersionLike +from relic.sga.protocols import ArchiveWalk, FileCollection, FolderCollection, DriveCollection, Folder, VirtualDrive ArchiveVersionLayout = Struct("< 2H") @@ -57,4 +58,37 @@ def __next__(self) -> int: return next(self.__iterable) -ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) \ No newline at end of file +ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) + + + +def walk(collection: Union[DriveCollection, FolderCollection, FileCollection]) -> ArchiveWalk: + raise TypeError("Use walk() function on collection!") + # drives = collection.drives if isinstance(collection, DriveCollection) else [] + # sub_folders = collection.sub_folders if isinstance(collection, FolderCollection) else [] + # files = collection.files if isinstance(collection, FileCollection) and not isinstance(collection, VirtualDrive) else [] + # + # root_drive = collection if isinstance(collection, VirtualDrive) else None + # root_folder = collection if isinstance(collection, Folder) else None + # + # # TODO optimize + # # logically, we can only walk folder OR drive + # if root_drive is None and root_folder is None and len(sub_folders) == 0 and len(files) == 0: + # # I don't think we need to return ANYTHING if we won't be iterating over it + # pass + # # if len(drives) == 0: # We will only yield this item, so we return this to always iterate over something + # # yield root_drive, root_folder, sub_folders, files + # else: + # yield root_drive, root_folder, sub_folders, files # at least one of these isn't None/Empty so we yield iti + # + # for drive in drives: + # for d, f, folds, files, in walk(drive): + # d = d or drive or root_drive + # f = f or root_folder + # yield d, f, folds, files + # + # for folder in sub_folders: + # for d, f, folds, files in walk(folder): + # d = d or root_drive + # f = f or folder or root_folder + # yield d, f, folds, files \ No newline at end of file diff --git a/src/relic/sga/io.py b/src/relic/sga/io.py index 558ade1..7c4632d 100644 --- a/src/relic/sga/io.py +++ b/src/relic/sga/io.py @@ -1,12 +1,12 @@ from __future__ import annotations -from typing import Dict, Type, BinaryIO, Union, Any +from typing import Dict, Type, BinaryIO from relic.common import VersionLike from relic.sga.vX import APIvX -from relic.sga.abc_ import DriveCollection, FolderCollectionABC, FileCollectionABC, ArchiveABC +from relic.sga.abc_ import ArchiveABC from relic.sga.common import ArchiveMagicWord, ArchiveVersion -from relic.sga.protocols import ArchiveHeader, Archive, ArchiveWalk, VirtualDrive, Folder +from relic.sga.protocols import ArchiveHeader, Archive def unpack_archive_header(versions: Dict[VersionLike, Type[ArchiveHeader]], stream: BinaryIO, read_magic: bool = True) -> ArchiveHeader: @@ -47,32 +47,3 @@ def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[Version return api.Archive.unpack(stream, header, sparse) # Defer to subclass (ensures packing works as expected) -def walk(collection: Union[DriveCollection, FolderCollectionABC, FileCollectionABC]) -> ArchiveWalk: - drives = collection.drives if isinstance(collection, DriveCollection) else [] - sub_folders = collection.sub_folders if isinstance(collection, FolderCollectionABC) else [] - files = collection.files if isinstance(collection, FileCollectionABC) and not isinstance(collection, VirtualDrive) else [] - - root_drive = collection if isinstance(collection, VirtualDrive) else None - root_folder = collection if isinstance(collection, Folder) else None - - # TODO optimize - # logically, we can only walk folder OR drive - if root_drive is None and root_folder is None and len(sub_folders) == 0 and len(files) == 0: - # I don't think we need to return ANYTHING if we won't be iterating over it - pass - # if len(drives) == 0: # We will only yield this item, so we return this to always iterate over something - # yield root_drive, root_folder, sub_folders, files - else: - yield root_drive, root_folder, sub_folders, files # at least one of these isn't None/Empty so we yield iti - - for drive in drives: - for d, f, folds, files, in walk(drive): - d = d or drive or root_drive - f = f or root_folder - yield d, f, folds, files - - for folder in sub_folders: - for d, f, folds, files in walk(folder): - d = d or root_drive - f = f or folder or root_folder - yield d, f, folds, files diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index a228d1f..8782c7a 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -41,39 +41,53 @@ def pack(self, stream: BinaryIO) -> int: raise NotImplementedError +@runtime_checkable +class ArchiveWalkable(Protocol): + def walk(self) -> ArchiveWalk: + raise NotImplementedError + +@runtime_checkable class DriveCollection(Protocol): drives: List[VirtualDrive] +@runtime_checkable class FolderCollection(Protocol): sub_folders: List[Folder] +@runtime_checkable class FileCollection(Protocol): files: List[File] +@runtime_checkable class FolderChild(Protocol): parent_folder: Optional[Folder] +@runtime_checkable class DriveChild(Protocol): parent_drive: Optional[VirtualDrive] -class VirtualDrive(FolderCollection, FileCollection, Protocol): +@runtime_checkable +class VirtualDrive(FolderCollection, FileCollection,ArchiveWalkable, Protocol): ... -class Folder(FolderCollection, FileCollection, FolderChild, DriveChild, Protocol): +@runtime_checkable +class Folder(FolderCollection, FileCollection, FolderChild, DriveChild,ArchiveWalkable, Protocol): ... +@runtime_checkable class File(FolderChild, DriveChild, Protocol): ... -class Archive(DriveCollection, Protocol): +@runtime_checkable +class Archive(DriveCollection,ArchiveWalkable, Protocol): header: ArchiveHeader """Sparse represents whether data was loaded on creation.""" _sparse: bool diff --git a/src/relic/sga/v5.py b/src/relic/sga/v5.py index 4efc36d..6485be6 100644 --- a/src/relic/sga/v5.py +++ b/src/relic/sga/v5.py @@ -98,7 +98,7 @@ def version(self) -> VersionLike: return ArchiveVersion.Dow2 @classmethod - def _unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': + def unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) assert rsv_1 == 1 @@ -110,7 +110,7 @@ def _unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) - def _pack(self, stream: BinaryIO) -> int: + def pack(self, stream: BinaryIO) -> int: args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk return self.LAYOUT.pack_stream(stream, *args) diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index 529105c..24ae3a9 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -3,13 +3,20 @@ import pytest +from relic.sga import protocols from relic.sga.abc_ import ArchiveABC -from relic.sga.common import ArchiveMagicWord from relic.sga.protocols import ArchiveWalk from tests.helpers import TF from tests.relic.sga.datagen import DowII, DowI, DowIII +def _ARCHIVE_WALK_SAMPLE(a:protocols.Archive) -> ArchiveWalk: + d = a.drives[0] + sfs = d.sub_folders + dfs = d.files + yield d, None, sfs, dfs + yield d, sfs[0], [], sfs[0].files + class ArchiveTests: def assert_equal(self, expected: ArchiveABC, result: ArchiveABC, sparse: bool): assert expected.header == result.header @@ -34,17 +41,6 @@ def test_unpack(self, stream_data: bytes, expected: ArchiveABC): assert expected.__class__ == archive.__class__ self.assert_equal(expected, archive, sparse) - @abstractmethod - def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): - for read_magic in TF: - for sparse in TF: - for validate in ([False] if not valid_checksums else TF): - with BytesIO(stream_data) as stream: - if not read_magic: - stream.seek(ArchiveMagicWord.layout.size) - archive = ArchiveABC.unpack(stream, read_magic, sparse, validate=validate) - assert expected.__class__ == archive.__class__ - self.assert_equal(expected, archive, sparse) @abstractmethod def test_pack(self, archive: ArchiveABC, expected: bytes): @@ -65,32 +61,21 @@ def fast_gen_dow1_archive(*args): DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") -def DOW1_ARCHIVE_WALK() -> ArchiveWalk: - a = DOW1_ARCHIVE - d = a.drives[0] - sfs = d.sub_folders - yield d, None, sfs, [] - yield d, sfs[0], [], sfs[0].files -class TestDowIArchive(ArchiveTests): +class TestArchiveV2(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) def test_unpack(self, stream_data: bytes, expected: ArchiveABC): super().test_unpack(stream_data, expected) - # @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], - # [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE, True)]) - # def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): - # super().old_test_unpack(stream_data, expected, valid_checksums) - @pytest.mark.parametrize(["archive", "expected"], [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], - [(DOW1_ARCHIVE, DOW1_ARCHIVE_WALK())]) + [(DOW1_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW1_ARCHIVE))]) def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -102,32 +87,20 @@ def fast_gen_dow2_archive(*args): DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") -def DOW2_ARCHIVE_WALK() -> ArchiveWalk: - a = DOW2_ARCHIVE - d = a.drives[0] - sfs = d.sub_folders - yield d, None, sfs, [] - yield d, sfs[0], [], sfs[0].files - -class TestDowIIArchive(ArchiveTests): +class TestArchiveV5(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) def test_unpack(self, stream_data: bytes, expected: ArchiveABC): super().test_unpack(stream_data, expected) - @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], - [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE, True)]) - def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): - super().old_test_unpack(stream_data, expected, valid_checksums) - @pytest.mark.parametrize(["archive", "expected"], [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], - [(DOW2_ARCHIVE, DOW2_ARCHIVE_WALK())]) + [(DOW2_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW2_ARCHIVE))]) def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -139,31 +112,20 @@ def fast_gen_dow3_archive(*args): DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED = fast_gen_dow3_archive("Dow3 Test Archive", "Tests", "Some Witty FileName.txt", b"NGL; I'm running out of dumb/clever test data.") -def DOW3_ARCHIVE_WALK() -> ArchiveWalk: - a = DOW3_ARCHIVE - d = a.drives[0] - sfs = d.sub_folders - yield d, None, sfs, [] - yield d, sfs[0], [], sfs[0].files -class TestDowIIIArchive(ArchiveTests): +class TestArchiveV9(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) def test_unpack(self, stream_data: bytes, expected: ArchiveABC): super().test_unpack(stream_data, expected) - @pytest.mark.parametrize(["stream_data", "expected", "valid_checksums"], - [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE, True)]) - def old_test_unpack(self, stream_data: bytes, expected: ArchiveABC, valid_checksums: bool): - super().old_test_unpack(stream_data, expected, valid_checksums) - @pytest.mark.parametrize(["archive", "expected"], [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) def test_pack(self, archive: ArchiveABC, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], - [(DOW3_ARCHIVE, DOW3_ARCHIVE_WALK())]) + [(DOW3_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW3_ARCHIVE))]) def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): super().test_walk(archive, expected) diff --git a/tests/relic/sga/archive/test_archive_header.py b/tests/relic/sga/archive/test_archive_header.py index 7520ec3..6e6d530 100644 --- a/tests/relic/sga/archive/test_archive_header.py +++ b/tests/relic/sga/archive/test_archive_header.py @@ -6,14 +6,12 @@ from serialization_tools.ioutil import WindowPtr, Ptr from serialization_tools.size import KiB, MiB, GiB -import relic.sga.io from relic.common import Version -from relic.sga.common import ArchiveMagicWord, ArchiveVersion from relic.sga import protocols as proto, v2, v5, v9 from relic.sga.checksums import gen_md5_checksum, validate_md5_checksum -from relic.sga.v9 import ArchiveHeader -from tests.relic.sga.datagen import DowI, DowII, DowIII +from relic.sga.common import ArchiveVersion from tests.helpers import TF +from tests.relic.sga.datagen import DowI, DowII, DowIII class ArchiveHeaderTests: @@ -22,45 +20,14 @@ def test_validate_checksums(self, archive: bytes, cls: Type[proto.ArchiveHeader] for fast in TF: for _assert in TF: with BytesIO(archive) as stream: + stream.seek(12) # skip magic/version archive_header = cls.unpack(stream) archive_header.validate_checksums(stream, fast=fast, _assert=_assert) @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_version(self, archive: ArchiveHeader, expected: Version): + def test_version(self, archive: proto.ArchiveHeader, expected: Version): assert archive.version == expected - # @abstractmethod # Trick PyCharm into requiring us to redefine this - def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - with BytesIO(buffer) as stream: - result = relic.sga.io.unpack_archive(stream) - assert result == expected - - # @abstractmethod # Trick PyCharm into requiring us to redefine this - def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): - with BytesIO() as stream: - inst.pack(stream) - stream.seek(0) - result = stream.read() - assert result == expected - - # @abstractmethod # Trick PyCharm into requiring us to redefine this - def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - for read_magic in TF: - with BytesIO(buffer) as stream: - if not read_magic: - ArchiveMagicWord.read_magic_word(stream) # read past magic - - try: - unpacked = ArchiveHeader.unpack(stream, read_magic) - except AssertionError as e: - if read_magic and bad_magic_word: - return # Test passed - else: - raise e - else: - assert expected.__class__ == unpacked.__class__ - assert expected == unpacked - @abstractmethod def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): with BytesIO(buffer) as stream: @@ -73,23 +40,9 @@ def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): written = inst.pack(stream) stream.seek(0) packed = stream.read() + assert len(packed) == written assert expected == packed - @abstractmethod # Trick PyCharm into requiring us to redefine this - def old_test_pack(self, inst: proto.ArchiveHeader, expected: bytes): - magic_size = ArchiveMagicWord.layout.size - for write_magic in TF: - with BytesIO() as stream: - written = inst.pack(stream, write_magic) - # assert len(expected) == written - (0 if write_magic else magic_size) - if not write_magic: - true_expected = expected[magic_size:] - else: - true_expected = expected - stream.seek(0) - result = stream.read() - assert true_expected == result - _KNOWN_EIGEN = b'06BEF126-4E3C-48D3-8D2E-430BF125B54F' _KNOWN_DATA = b'\xf3\x0cGjx:"\xb7O\x89\xc1\x82H\xb2\xa1\xaa\x82-\xe4\\{\xe2\x905\x0c\xdbT\x0c\x82\xa3y\xdat\xd5\xdf\xb7\x04\x1e\xd0\xaa\xf6\xc9|U%\xf7\x0c\xb9\x92\xc9\xbf\xa9\xa3\xaaQ]\xb6\x8c\x10\x87\xc3r\xe3\x89\x16T\x936\xc5l/(\xbd\xbc\x08\xa2\x9b`|\xec\xd5\xf3\xfd\x83\x85\xadHY\xf4U\xb8\x85\x92\xcd\x1d\xc1\xa2\x0f\xbam!\xd5\xacnft>\'\xf0\x12\x9c\x0c\x1c{\xa2\x15VI\xb0\x13\x89\xde\x889\xdc\x15_\xc8\\\x97\x06\xa7\xde\xc0p\xf9o\t\xd3_\x9d\xa7@.\x81\xed\xdd\x13\x9b m9\xf5\x1bV\xc3\xe0\xd4@\x99\xa2\x8aGr\x04\xff\x05\xedIs\x15\t0\x98G\x87O\x9c\xa1\xd2\tcS\xb3\x1eI\xf5\xe3Qp\xe0\xd0m\xbf;\xfb\x856\xa7\\\xb8\xad\x19\xc1\xa3\xaf+\xd4\x08\xd5Y4\x87p|p`dQ\x1c|>is\x17;\xa6\x8d\xa2\xa4\xdc\xe0\xd6\xaf\xc3\x93\xf59\x9a[\x19J\xc88\xb8\xfd/\xe4\xc6J\x8c\xddCY&\x8f' @@ -153,7 +106,7 @@ def fast_dow1_archive_header(name, toc_pos, bad_magic: bytes): class TestDowIArchiveHeader(ArchiveHeaderTests): @pytest.mark.parametrize( ["archive", "cls"], - [(DOW1_ARCHIVE_BUFFER[HDR_START:], v2.ArchiveHeader)]) + [(DOW1_ARCHIVE_BUFFER, v2.ArchiveHeader)]) def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): super().test_validate_checksums(archive, cls) @@ -161,61 +114,46 @@ def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): ["expected", "inst"], [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] ) - def test_pack(self, inst: ArchiveHeader, expected: bytes): + def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): super().test_pack(inst, expected) @pytest.mark.parametrize( ["buffer", "expected"], [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] ) - def test_unpack(self, buffer: bytes, expected: ArchiveHeader): + def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): super().test_unpack(buffer, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, ArchiveVersion.Dow)]) - def test_version(self, archive: ArchiveHeader, expected: Version): + def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) # Not garunteed to be a valid header -def fast_dow2_archive_header(name, bad_magic: bytes): +def fast_dow2_archive_header(name): _ABC = 0, 0, 0 - return DowII.gen_archive_header(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC, magic=bad_magic) + return DowII.gen_archive_header(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC) -DOW2_HEADER, DOW2_HEADER_DATA, DOW2_HEADER_DATA_BAD_MAGIC = fast_dow2_archive_header("Dawn Of War 2 Test Header", b"Garbage!") +DOW2_HEADER, DOW2_HEADER_DATA = fast_dow2_archive_header("Dawn Of War 2 Test Header") DOW2_ARCHIVE_BUFFER = DowII.gen_sample_archive_buffer("Dawn Of War 2 Test Archive", "Dow2 Tests", "Imperial Propoganda.txt", b"By the Emperor, we're ready to unleash eleven barrels, m' lord, sir!") class TestDowIIArchiveHeader(ArchiveHeaderTests): - @pytest.mark.parametrize( - ["expected", "inst"], - [(DOW2_HEADER_DATA[12:], DOW2_HEADER)], - ) - def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): - super().old_test_private_pack(inst, expected) - @pytest.mark.parametrize( ["buffer", "expected"], - [(DOW2_HEADER_DATA[12:], DOW2_HEADER)], + [(DOW2_HEADER_DATA[HDR_START:], DOW2_HEADER)], ) - def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - super().old_test_private_unpack(buffer, expected) - - @pytest.mark.parametrize( - ["buffer", "expected", "bad_magic_word"], - [(DOW2_HEADER_DATA, DOW2_HEADER, False), - (DOW2_HEADER_DATA_BAD_MAGIC, DOW2_HEADER, True)], - ) - def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - super().old_test_unpack(buffer, expected, bad_magic_word) + def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): + super().test_unpack(buffer, expected) @pytest.mark.parametrize( ["inst", "expected"], - [(DOW2_HEADER, DOW2_HEADER_DATA)]) - def old_test_pack(self, inst: ArchiveHeader, expected: bytes): - super().old_test_pack(inst, expected) + [(DOW2_HEADER, DOW2_HEADER_DATA[HDR_START:])]) + def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): + super().test_pack(inst, expected) @pytest.mark.parametrize( ["archive", "cls"], @@ -225,7 +163,7 @@ def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): super().test_validate_checksums(archive, cls) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, ArchiveVersion.Dow2)]) - def test_version(self, archive: ArchiveHeader, expected: Version): + def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) @@ -247,34 +185,19 @@ def test_validate_checksums(self, archive: bytes, cls: Type[v9.ArchiveHeader]): # HACK but if it fails it means logic has changed assert cls.validate_checksums(None, None, fast=fast, _assert=_assert) - @pytest.mark.parametrize( - ["expected", "inst"], - [(DOW3_HEADER_DATA[12:], DOW3_HEADER)], - ) - def old_test_private_pack(self, inst: ArchiveHeader, expected: bytes): - super().old_test_private_pack(inst, expected) - @pytest.mark.parametrize( ["buffer", "expected"], - [(DOW3_HEADER_DATA[12:], DOW3_HEADER)], - ) - def old_test_private_unpack(self, buffer: bytes, expected: ArchiveHeader): - super().old_test_private_unpack(buffer, expected) - - @pytest.mark.parametrize( - ["buffer", "expected", "bad_magic_word"], - [(DOW3_HEADER_DATA, DOW3_HEADER, False), - (DOW3_HEADER_DATA_BAD_MAGIC, DOW3_HEADER, True)], + [(DOW3_HEADER_DATA[HDR_START:], DOW3_HEADER)], ) - def old_test_unpack(self, buffer: bytes, expected: ArchiveHeader, bad_magic_word: bool): - super().old_test_unpack(buffer, expected, bad_magic_word) + def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): + super().test_unpack(buffer, expected) @pytest.mark.parametrize( ["inst", "expected"], - [(DOW3_HEADER, DOW3_HEADER_DATA)]) - def old_test_pack(self, inst: ArchiveHeader, expected: bytes): - super().old_test_pack(inst, expected) + [(DOW3_HEADER, DOW3_HEADER_DATA[HDR_START:])]) + def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): + super().test_pack(inst, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, ArchiveVersion.Dow3)]) - def test_version(self, archive: ArchiveHeader, expected: Version): + def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) diff --git a/tests/relic/sga/file/test_file_header.py b/tests/relic/sga/file/test_file_header.py index d2e68c4..ebf800f 100644 --- a/tests/relic/sga/file/test_file_header.py +++ b/tests/relic/sga/file/test_file_header.py @@ -19,23 +19,9 @@ def test_pack(self, header: FileHeaderABC, expected: bytes): assert stream.read() == expected @abstractmethod - def test_inner_pack(self, header: FileHeaderABC, expected: bytes): - with BytesIO() as stream: - written = header.pack(stream) - assert written == len(expected) - stream.seek(0) - assert stream.read() == expected - - @abstractmethod - def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): - with BytesIO(data_stream) as stream: - header = expected.__class__.old_unpack(stream) - assert header == expected - - @abstractmethod - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): with BytesIO(data_stream) as stream: - header = FileHeaderABC.old_unpack(stream, version) + header = expected.__class__.unpack(stream) assert header == expected @@ -47,17 +33,11 @@ class TestDowIFileHeader(FileHeaderTests): def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) - @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW1_HEADER, DOW1_HEADER_BUFFER, ArchiveVersion.Dow)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): - super().test_unpack(data_stream, expected, version) @pytest.mark.parametrize(["expected", "data_stream"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_inner_unpack(data_stream, expected) + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): + super().test_unpack(data_stream, expected) + DOW2_HEADER, DOW2_HEADER_BUFFER = DowII.gen_file_header(0, 0, 0), DowII.gen_file_header_buffer(0, 0, 0) @@ -68,17 +48,10 @@ class TestDowIIFileHeader(FileHeaderTests): def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) - @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW2_HEADER, DOW2_HEADER_BUFFER, ArchiveVersion.Dow2)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): - super().test_unpack(data_stream, expected, version) - @pytest.mark.parametrize(["expected", "data_stream"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_inner_unpack(data_stream, expected) + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): + super().test_unpack(data_stream, expected) + DOW3_HEADER, DOW3_HEADER_BUFFER = DowIII.gen_file_header(0x0f, 0xf0, 0x09, 0x90), DowIII.gen_file_header_buffer(0x0f, 0xf0, 0x09, 0x90) @@ -89,14 +62,6 @@ class TestDowIIIFileHeader(FileHeaderTests): def test_pack(self, header: FileHeaderABC, expected: bytes): super().test_pack(header, expected) - @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_inner_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - @pytest.mark.parametrize(["expected", "data_stream", "version"], [(DOW3_HEADER, DOW3_HEADER_BUFFER, ArchiveVersion.Dow3)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC, version: VersionLike): - super().test_unpack(data_stream, expected, version) - @pytest.mark.parametrize(["expected", "data_stream"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_inner_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_inner_unpack(data_stream, expected) + def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): + super().test_unpack(data_stream, expected) diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py index 04496ed..0e56ab6 100644 --- a/tests/relic/sga/test_vX_interface.py +++ b/tests/relic/sga/test_vX_interface.py @@ -20,6 +20,8 @@ def inner_permutate(subset: List, remaining: Tuple[List]) -> Iterable: yield item, sub_item if len(items) == 0: + return [] + elif len(items) == 1: return items[0] else: return inner_permutate(items[0], items[1:]) From 8204f88f2f45bff06a245fd23a42014ffd03e194 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Wed, 8 Jun 2022 17:33:22 -0800 Subject: [PATCH 03/19] SGA script fix --- src/relic/chunky/chunk/header.py | 3 +- src/relic/chunky/chunky/header.py | 3 +- src/relic/sga/abc_.py | 85 +++++++++---------------- src/relic/sga/io.py | 5 +- src/relic/sga/protocols.py | 9 ++- src/relic/sga/v9.py | 26 ++++++-- src/scripts/dump_sga.py | 5 +- src/scripts/universal/sga/common.py | 2 +- src/scripts/universal/sga/unpack.py | 4 +- tests/relic/sga/archive/test_archive.py | 9 +-- 10 files changed, 73 insertions(+), 78 deletions(-) diff --git a/src/relic/chunky/chunk/header.py b/src/relic/chunky/chunk/header.py index fe78790..03b97af 100644 --- a/src/relic/chunky/chunk/header.py +++ b/src/relic/chunky/chunk/header.py @@ -8,7 +8,8 @@ from serialization_tools.vstruct import VStruct from ..chunky.header import ChunkyVersion -from relic.sga.common.common import VersionLike, VersionError +from relic.sga.common import VersionLike +from ...common import VersionError class ChunkType(Enum): diff --git a/src/relic/chunky/chunky/header.py b/src/relic/chunky/chunky/header.py index fba28e0..7fb5b62 100644 --- a/src/relic/chunky/chunky/header.py +++ b/src/relic/chunky/chunky/header.py @@ -6,7 +6,8 @@ from serialization_tools.magic import MagicWordIO, MagicWord from serialization_tools.structx import Struct -from relic.sga.common.common import VersionEnum, Version, VersionLike, VersionError +from relic.common import VersionError +from relic.sga.common import VersionEnum, Version, VersionLike ChunkyVersionLayout = Struct("< 2L") diff --git a/src/relic/sga/abc_.py b/src/relic/sga/abc_.py index afba742..796895a 100644 --- a/src/relic/sga/abc_.py +++ b/src/relic/sga/abc_.py @@ -2,6 +2,7 @@ import zlib from abc import ABC +from collections import UserDict from dataclasses import dataclass from pathlib import PurePosixPath from typing import List, BinaryIO, Optional, Dict, ClassVar, Tuple, Type @@ -53,6 +54,32 @@ def build_tree(self): _.build_tree() +class NameBufferABC: + @classmethod + def unpack(cls, stream: BinaryIO, name_count: int) -> Dict[int,str]: + inst = {} + start = stream.tell() # use stream to avoid invalidating window + while len(inst) < name_count: + remaining = name_count - len(inst) + current = stream.tell() # Get relative pos to start + buffer = stream.read(_BUFFER_SIZE) + if len(buffer) == 0: + raise TypeError("Ran out of data!") + terminal_null = buffer.endswith(_NULL) + parts = buffer.split(_NULL, remaining) + + offset = 0 + for i, p in enumerate(parts): + if i == len(parts) - 1: + break + inst[current - start + offset] = p.decode("ascii") + offset += len(p) + 1 # +1 to include null terminal + + if not terminal_null: + stream.seek(current + offset) + return inst + + @dataclass class ArchiveTableOfContentsHeadersABC: drives: List[VirtualDriveHeaderABC] @@ -62,46 +89,11 @@ class ArchiveTableOfContentsHeadersABC: VDRIVE_HEADER_CLS: ClassVar[Type[VirtualDriveHeaderABC]] FOLDER_HEADER_CLS: ClassVar[Type[FolderHeaderABC]] FILE_HEADER_CLS: ClassVar[Type[FileHeaderABC]] + NAME_BUFFER_CLS: ClassVar[Type[NameBufferABC]] = NameBufferABC @classmethod def old_unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC, version: VersionLike = None) -> ArchiveTableOfContentsHeadersABC: - version = version or ptr.version # abusing the fact that the classes know their own version to avoid explicitly passing it in - - local_ptr = ptr.virtual_drive_ptr - with local_ptr.stream_jump_to(stream) as handle: - virtual_drives = [VirtualDriveHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] - - local_ptr = ptr.folder_ptr - with local_ptr.stream_jump_to(stream) as handle: - folders = [FolderHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] - - local_ptr = ptr.file_ptr - with local_ptr.stream_jump_to(stream) as handle: - files = [FileHeaderABC.old_unpack(handle, version) for _ in range(local_ptr.count)] - - # This gets a bit wierd - local_ptr = ptr.name_ptr - names: Dict[int, str] = {} - with local_ptr.stream_jump_to(stream) as handle: - start = stream.tell() # use stream to avoid invalidating window - while len(names) < local_ptr.count: - remaining = local_ptr.count - len(names) - current = stream.tell() # Get relative pos to start - buffer = handle.read(_BUFFER_SIZE) - terminal_null = buffer.endswith(_NULL) - parts = buffer.split(_NULL, remaining) - - offset = 0 - for i, p in enumerate(parts): - if i == len(parts) - 1: - break - names[current - start + offset] = p.decode("ascii") - offset += len(p) + 1 # +1 to include null terminal - - if not terminal_null: - stream.seek(current + offset) - - return ArchiveTableOfContentsHeadersABC(virtual_drives, folders, files, names) + raise TypeError('Use .unpack() and APIS to unpack!') @classmethod def unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC) -> ArchiveTableOfContentsHeadersABC: @@ -119,25 +111,8 @@ def unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC) -> ArchiveT # This gets a bit wierd local_ptr = ptr.name_ptr - names: Dict[int, str] = {} with local_ptr.stream_jump_to(stream) as handle: - start = stream.tell() # use stream to avoid invalidating window - while len(names) < local_ptr.count: - remaining = local_ptr.count - len(names) - current = stream.tell() # Get relative pos to start - buffer = handle.read(_BUFFER_SIZE) - terminal_null = buffer.endswith(_NULL) - parts = buffer.split(_NULL, remaining) - - offset = 0 - for i, p in enumerate(parts): - if i == len(parts) - 1: - break - names[current - start + offset] = p.decode("ascii") - offset += len(p) + 1 # +1 to include null terminal - - if not terminal_null: - stream.seek(current + offset) + names = cls.NAME_BUFFER_CLS.unpack(handle, local_ptr.count) return ArchiveTableOfContentsHeadersABC(virtual_drives, folders, files, names) diff --git a/src/relic/sga/io.py b/src/relic/sga/io.py index 7c4632d..6e73b28 100644 --- a/src/relic/sga/io.py +++ b/src/relic/sga/io.py @@ -4,9 +4,9 @@ from relic.common import VersionLike from relic.sga.vX import APIvX -from relic.sga.abc_ import ArchiveABC from relic.sga.common import ArchiveMagicWord, ArchiveVersion from relic.sga.protocols import ArchiveHeader, Archive +from relic import sga def unpack_archive_header(versions: Dict[VersionLike, Type[ArchiveHeader]], stream: BinaryIO, read_magic: bool = True) -> ArchiveHeader: @@ -37,7 +37,8 @@ def pack_archive(archive: Archive, stream: BinaryIO, write_magic: bool = True) - raise NotImplementedError -def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[VersionLike, APIvX] = None, *, validate: bool = True) -> ArchiveABC: +def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[VersionLike, APIvX] = None, *, validate: bool = True) -> Archive: + versions = sga.APIS if versions is None else versions ArchiveMagicWord.assert_magic_word(stream, True) version = ArchiveVersion.unpack_version(stream) api = versions[version] diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index 8782c7a..755733b 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -1,6 +1,6 @@ from __future__ import annotations -from pathlib import PurePath +from pathlib import PurePath, PurePosixPath from typing import BinaryIO, Protocol, runtime_checkable, List, Optional, Iterable, Tuple from serialization_tools.ioutil import Ptr, WindowPtr @@ -85,6 +85,13 @@ class Folder(FolderCollection, FileCollection, FolderChild, DriveChild,ArchiveWa class File(FolderChild, DriveChild, Protocol): ... + @property + def full_path(self) -> PurePosixPath: + raise NotImplementedError + + def read_data(self, data_stream, param): + raise NotImplementedError + @runtime_checkable class Archive(DriveCollection,ArchiveWalkable, Protocol): diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py index cee9499..1d1d012 100644 --- a/src/relic/sga/v9.py +++ b/src/relic/sga/v9.py @@ -1,14 +1,14 @@ from __future__ import annotations from dataclasses import dataclass -from typing import BinaryIO +from typing import BinaryIO, Dict from serialization_tools.ioutil import Ptr, WindowPtr from serialization_tools.structx import Struct from relic.common import VersionLike from relic.sga import abc_ -from relic.sga.abc_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC +from relic.sga.abc_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC, NameBufferABC from relic.sga.common import ArchiveVersion from relic.sga.protocols import ArchiveHeader from relic.sga.vX import APIvX @@ -74,7 +74,7 @@ def compressed(self): @dataclass class ArchiveHeader(ArchiveHeaderABC, _V9): # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"<128s Q L Q L 3L 256s") + LAYOUT = Struct(f"<128s QL QL 2L 256s") toc_ptr: WindowPtr data_ptr: WindowPtr @@ -97,12 +97,11 @@ def version(self) -> VersionLike: @classmethod def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, toc_pos, toc_size, data_pos, data_size, rsv_0_a, rsv_1, rsv_0_b, unk = cls.LAYOUT.unpack_stream(stream) + name, toc_pos, toc_size, data_pos, data_size, rsv_0_a, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) assert rsv_1 == 1 assert rsv_0_a == 0 - assert rsv_0_b == 0 - + assert stream.tell() == toc_pos or stream.tell() == data_pos, (stream.tell(), toc_pos, data_pos) toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) data_ptr = WindowPtr(offset=data_pos, size=data_size) name = name.decode("utf-16-le").rstrip("\0") @@ -126,10 +125,25 @@ def __eq__(self, other): VirtualDrive = abc_.VirtualDriveABC +class NameBuffer(NameBufferABC): + @classmethod + def unpack(cls, stream: BinaryIO, buffer_size: int) -> Dict[int, str]: + """ Dow III uses a 'buffer size' instead of a 'name count' to unpack names """ + buffer = stream.read(buffer_size) + parts = buffer.split(b"\0") + lookup = {} + offset = 0 + for name in parts: + lookup[offset] = name.decode("ascii") + offset += len(name) + 1 # +1 to account for b'\0' + return lookup + + class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): VDRIVE_HEADER_CLS = VirtualDriveHeader FOLDER_HEADER_CLS = FolderHeader FILE_HEADER_CLS = FileHeader + NAME_BUFFER_CLS = NameBuffer @dataclass(init=False) diff --git a/src/scripts/dump_sga.py b/src/scripts/dump_sga.py index 80bebdc..c96c84c 100644 --- a/src/scripts/dump_sga.py +++ b/src/scripts/dump_sga.py @@ -5,6 +5,7 @@ from serialization_tools.walkutil import BlackList, WhiteList, filter_by_path, filter_by_file_extension, collapse_walk_on_files +import relic.sga.common import relic.sga.io from relic.config import DowIIIGame, DowIIGame, DowGame, filter_latest_dow_game, get_dow_root_directories @@ -30,7 +31,7 @@ def walk_archive_paths(folder: os.PathLike, extensions: WhiteList = None, whitel walk = os.walk(folder) walk = filter_by_path(walk, whitelist=whitelist, blacklist=blacklist, prune=True) walk = filter_by_file_extension(walk, whitelist=extensions) - walk = relic.sga.io.walk(walk) + walk = relic.sga.common.walk(walk) return collapse_walk_on_files(walk) @@ -45,7 +46,7 @@ def dump_archive(input_folder: os.PathLike, output_folder: os.PathLike, overwrit archive_name = splitext(basename(input_file_path))[0] with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: print(f"\tDumping '{archive_name}'") - for _, _, _, files in relic.sga.io.walk(): + for _, _, _, files in relic.sga.common.walk(): for file in files: relative_file_path = file.full_path diff --git a/src/scripts/universal/sga/common.py b/src/scripts/universal/sga/common.py index 27d4ca6..9176f75 100644 --- a/src/scripts/universal/sga/common.py +++ b/src/scripts/universal/sga/common.py @@ -5,7 +5,7 @@ from serialization_tools.walkutil import blacklisted -from relic.sga import ArchiveMagicWord +from relic.sga.common import ArchiveMagicWord from scripts.universal.common import print_error, print_wrote, print_reading, PrintOptions, SharedExtractorParser SharedSgaParser = argparse.ArgumentParser(parents=[SharedExtractorParser], add_help=False) diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index 00b7c67..5becaa1 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,8 +3,8 @@ from pathlib import Path from typing import Dict +import relic.sga.common import relic.sga.io -from relic.sga import Archive from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner @@ -30,7 +30,7 @@ def unpack_archive(in_path: str, out_path: str, print_opts: PrintOptions = None, archive_name = splitext(basename(in_path))[0] with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: print_any(f"Unpacking \"{archive_name}\"...", indent_level, print_opts) - for _, _, _, files in relic.sga.io.walk(): + for _, _, _, files in archive.walk(): for file in files: try: relative_file_path = file.full_path diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index 24ae3a9..7b6f8b7 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -10,13 +10,14 @@ from tests.relic.sga.datagen import DowII, DowI, DowIII -def _ARCHIVE_WALK_SAMPLE(a:protocols.Archive) -> ArchiveWalk: +def _ARCHIVE_WALK_SAMPLE(a: protocols.Archive) -> ArchiveWalk: d = a.drives[0] sfs = d.sub_folders dfs = d.files yield d, None, sfs, dfs yield d, sfs[0], [], sfs[0].files + class ArchiveTests: def assert_equal(self, expected: ArchiveABC, result: ArchiveABC, sparse: bool): assert expected.header == result.header @@ -41,7 +42,6 @@ def test_unpack(self, stream_data: bytes, expected: ArchiveABC): assert expected.__class__ == archive.__class__ self.assert_equal(expected, archive, sparse) - @abstractmethod def test_pack(self, archive: ArchiveABC, expected: bytes): for write_magic in TF: @@ -61,8 +61,6 @@ def fast_gen_dow1_archive(*args): DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") - - class TestArchiveV2(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) @@ -87,7 +85,6 @@ def fast_gen_dow2_archive(*args): DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") - class TestArchiveV5(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) @@ -112,8 +109,6 @@ def fast_gen_dow3_archive(*args): DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED = fast_gen_dow3_archive("Dow3 Test Archive", "Tests", "Some Witty FileName.txt", b"NGL; I'm running out of dumb/clever test data.") - - class TestArchiveV9(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) From 934fdf721d8f83e336464f110a7f0ec5acb609a4 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Wed, 8 Jun 2022 20:21:22 -0800 Subject: [PATCH 04/19] Fix v9 tests --- src/relic/sga/protocols.py | 9 +++++---- src/relic/sga/v9.py | 3 +-- tests/relic/sga/datagen.py | 10 ++++------ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index 755733b..712df52 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -30,7 +30,7 @@ def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bo @classmethod @property - def version(cls) -> VersionLike: + def version(self) -> VersionLike: raise NotImplementedError @classmethod @@ -46,6 +46,7 @@ class ArchiveWalkable(Protocol): def walk(self) -> ArchiveWalk: raise NotImplementedError + @runtime_checkable class DriveCollection(Protocol): drives: List[VirtualDrive] @@ -72,12 +73,12 @@ class DriveChild(Protocol): @runtime_checkable -class VirtualDrive(FolderCollection, FileCollection,ArchiveWalkable, Protocol): +class VirtualDrive(FolderCollection, FileCollection, ArchiveWalkable, Protocol): ... @runtime_checkable -class Folder(FolderCollection, FileCollection, FolderChild, DriveChild,ArchiveWalkable, Protocol): +class Folder(FolderCollection, FileCollection, FolderChild, DriveChild, ArchiveWalkable, Protocol): ... @@ -94,7 +95,7 @@ def read_data(self, data_stream, param): @runtime_checkable -class Archive(DriveCollection,ArchiveWalkable, Protocol): +class Archive(DriveCollection, ArchiveWalkable, Protocol): header: ArchiveHeader """Sparse represents whether data was loaded on creation.""" _sparse: bool diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py index 1d1d012..9fd381d 100644 --- a/src/relic/sga/v9.py +++ b/src/relic/sga/v9.py @@ -101,7 +101,6 @@ def unpack(cls, stream: BinaryIO) -> ArchiveHeader: assert rsv_1 == 1 assert rsv_0_a == 0 - assert stream.tell() == toc_pos or stream.tell() == data_pos, (stream.tell(), toc_pos, data_pos) toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) data_ptr = WindowPtr(offset=data_pos, size=data_size) name = name.decode("utf-16-le").rstrip("\0") @@ -109,7 +108,7 @@ def unpack(cls, stream: BinaryIO) -> ArchiveHeader: return cls(name, toc_ptr, data_ptr, unk) def pack(self, stream: BinaryIO) -> int: - args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 0, 1, 0, self.unk + args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 0, 1, self.unk return self.LAYOUT.pack_stream(stream, *args) def __eq__(self, other): diff --git a/tests/relic/sga/datagen.py b/tests/relic/sga/datagen.py index 6fbab13..57848c6 100644 --- a/tests/relic/sga/datagen.py +++ b/tests/relic/sga/datagen.py @@ -304,7 +304,7 @@ def gen_csum(buffer: bytes, eigen: bytes) -> bytes: class DowIII: VDRIVE_UNK = bytes.fromhex("dead") # Arbitrary value - ARCHIVE_HEADER_SIZE = 432 + ARCHIVE_HEADER_SIZE = 428 ARCHIVE_HEADER_UNK = b"dead " * 51 + b"\0" # 256 bytes spamming `dead ` in ascii; with one byte '\0' to pad to 256 @classmethod @@ -319,7 +319,7 @@ def gen_archive_header_buffer(cls, name: str, toc_offset: int, toc_size: int, da encoded_toc_size = uint(toc_size) encoded_data_offset = ulong(data_offset) encoded_data_size = uint(data_size) - return magic + version + encoded_name + encoded_toc_offset + encoded_toc_size + encoded_data_offset + encoded_data_size + uint(0) + uint(1) + uint(0) + cls.ARCHIVE_HEADER_UNK + return magic + version + encoded_name + encoded_toc_offset + encoded_toc_size + encoded_data_offset + encoded_data_size + uint(0) + uint(1) + cls.ARCHIVE_HEADER_UNK @staticmethod def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v9.VirtualDriveHeader: @@ -385,8 +385,7 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) - # toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! - toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) # According to my notes; V9 uses this to store the size of the name buffer; but Archive Unpacking assumes it's a count; must test on real V9 files. TODO + toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! toc_ptr_buf = cls.gen_toc_ptr_buffer(*toc_ptrs) return cls.gen_archive_buffer(archive_name, toc_ptr_buf, toc_buf, file_uncomp_data, magic) @@ -406,8 +405,7 @@ def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_unco folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) - # toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! - toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) # According to my notes; V9 uses this to store the size of the name buffer; but Archive Unpacking assumes it's a count; must test on real V9 files. TODO + toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! toc_ptr_buf = cls.gen_toc_ptr_buffer(*toc_ptrs) full_toc = toc_ptr_buf + toc_buf From 91f93a342e08976745bffb6df594381168d3bd60 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Thu, 9 Jun 2022 00:02:50 -0800 Subject: [PATCH 05/19] v7 support? --- src/relic/sga/__init__.py | 3 +- src/relic/sga/common.py | 2 + src/relic/sga/v7.py | 166 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 src/relic/sga/v7.py diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py index 2d5926d..95f6b8b 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga/__init__.py @@ -15,7 +15,8 @@ # # __all__.extend(vdrive.__all__) from relic.sga.v2 import APIv2 from relic.sga.v5 import APIv5 +from relic.sga.v7 import APIv7 from relic.sga.v9 import APIv9 -__APIS = [APIv2,APIv5,APIv9] +__APIS = [APIv2,APIv5,APIv7,APIv9] APIS = {api.version:api for api in __APIS} \ No newline at end of file diff --git a/src/relic/sga/common.py b/src/relic/sga/common.py index 1b6b2d6..cfbc40c 100644 --- a/src/relic/sga/common.py +++ b/src/relic/sga/common.py @@ -18,6 +18,8 @@ class ArchiveVersion(VersionEnum): Dow = v2 v5 = Version(5) Dow2 = v5 + v7 = Version(7) + CoH2 = v7 v9 = Version(9) Dow3 = v9 diff --git a/src/relic/sga/v7.py b/src/relic/sga/v7.py new file mode 100644 index 0000000..8107f30 --- /dev/null +++ b/src/relic/sga/v7.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO, Tuple, ClassVar, Type, List, Dict + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga import v2 +from relic.sga.abc_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC, ArchiveTableOfContentsABC, VirtualDriveABC, NameBufferABC +from relic.sga.checksums import validate_md5_checksum +from relic.sga.common import ArchiveVersion +from relic.sga.protocols import Archive, ArchiveWalk +from relic.sga.v2 import ArchiveToCPtrABC +from relic.sga import abc_ +from relic.sga.vX import APIvX + +version = ArchiveVersion.v7 + + +class _V7: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V7): + LAYOUT = Struct("< 64s 64s 5L") + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V7): + LAYOUT = Struct("< 8I") + + +@dataclass +class FolderHeader(FolderHeaderABC, _V7): + LAYOUT = Struct("< L 4I") + + +@dataclass +class FileHeader(FileHeaderABC, _V7): + LAYOUT = Struct(f"<5L H 2L") + unk_a: int + unk_b: int + unk_c: int + unk_d: int + + @property + def compressed(self): + return self.compressed_size < self.decompressed_size + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_off, data_off, comp_size, decomp_size, unk_a, unk_b, unk_c, unk_d = cls.LAYOUT.unpack_stream(stream) + # Name, File, Compressed, Decompressed, ???, ??? + name_ptr = Ptr(name_off) + data_ptr = Ptr(data_off) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d) + + def pack(self, stream: BinaryIO) -> int: + return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b, self.unk_c, self.unk_d) + + def __eq__(self, other): + return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V7): + LAYOUT = Struct("< 128s 3L") + LAYOUT_2 = Struct("< 2L") + TOC_HEADER_SIZE = ArchiveToCPtr.LAYOUT.size + toc_ptr: WindowPtr + unk_a: int + unk_b: int + + # This may not mirror DowI one-to-one, until it's verified, it stays here + # noinspection DuplicatedCode + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): + return True + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name and (self.unk_a, self.unk_b) == (other.unk_a, other.unk_b) \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + name, unk_a, data_offset, rsv_1 = cls.LAYOUT.unpack_stream(stream) + toc_pos = stream.tell() + stream.seek(cls.TOC_HEADER_SIZE, 1) + toc_size, unk_b = cls.LAYOUT_2.unpack_stream(stream) + + # assert toc_size == toc_size_2, (toc_size, toc_size_2) + assert rsv_1 == 1 + name = name.decode("utf-16-le").rstrip("\0") + toc_ptr, data_ptr = WindowPtr(toc_pos, toc_size), WindowPtr(data_offset) + return cls(name, toc_ptr, data_ptr, unk_a,unk_b) + + def pack(self, stream: BinaryIO) -> int: + name, toc_size, data_offset = self.name.encode("utf-16-le"), self.toc_ptr.size, self.data_ptr.offset + written = self.TOC_HEADER_SIZE # count + written += self.LAYOUT.pack_stream(stream, name, self.unk_a, data_offset, 1) + stream.seek(self.TOC_HEADER_SIZE, 1) # this will write \0 when seeking past files (unless python api/system api changes) + written += self.LAYOUT.pack_stream(stream, toc_size, self.unk_b) + return written + + +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + # NAME_BUFFER_CLS = NameBuffer + + +@dataclass(init=False) +class Archive(Archive, _V7): + header: ArchiveHeader + _sparse: bool + + def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse: bool): + self.header = header + self._sparse = _sparse + self.drives = drives + + # redefine function + walk = ArchiveABC.walk + + @classmethod + def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): + with header.toc_ptr.stream_jump_to(stream) as handle: + toc_ptr = ArchiveToCPtr.unpack(handle) + with header.toc_ptr.stream_jump_to(stream) as handle: + toc_headers = ArchiveTableOfContentsHeaders.unpack(handle, toc_ptr) + toc = ArchiveTableOfContentsABC.create(toc_headers) + + toc.load_toc() + toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up + if not sparse: + with header.data_ptr.stream_jump_to(stream) as handle: + toc.load_data(handle) + + return cls(header, toc.drives, sparse) + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +class APIv7(APIvX, _V7): + ArchiveHeader = ArchiveHeader + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive From 6f440f14d4aba5e24ccfcb7c143fdeb51aeb5f84 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Fri, 10 Jun 2022 13:19:54 -0800 Subject: [PATCH 06/19] Rewriting now that I have a better understanding of the binary layout Before I was rewriting to keep versioned code together; which ill still do; but now that I see that the SGA follows a pretty straightforward layout (for v2-v9) I think I can refactor a lot of excess code out --- src/relic/sga/common.py | 18 ++++++++++++++++-- src/relic/sga/v7.py | 39 ++++++++++++++++++++++++++++++++------- src/relic/sga/v9.py | 8 ++++---- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/relic/sga/common.py b/src/relic/sga/common.py index cfbc40c..b3f7743 100644 --- a/src/relic/sga/common.py +++ b/src/relic/sga/common.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass +from enum import Enum from typing import Optional, Iterator, BinaryIO, Union from serialization_tools.magic import MagicWordIO @@ -12,6 +13,20 @@ ArchiveVersionLayout = Struct("< 2H") +class FileVerificationType(Enum): + None_ = 0 # unknown real values, assuming incremental + CRC = 1 # unknown real values, assuming incremental + CRCBlocks = 2 # unknown real values, assuming incremental + MD5Blocks = 3 # unknown real values, assuming incremental + SHA1Blocks = 4 # unknown real values, assuming incremental + + +class FileStorageType(Enum): + Store = 0 + StreamCompress = 1 # 16 + BufferCompress = 2 # 32 + + class ArchiveVersion(VersionEnum): Unsupported = None v2 = Version(2) @@ -63,7 +78,6 @@ def __next__(self) -> int: ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) - def walk(collection: Union[DriveCollection, FolderCollection, FileCollection]) -> ArchiveWalk: raise TypeError("Use walk() function on collection!") # drives = collection.drives if isinstance(collection, DriveCollection) else [] @@ -93,4 +107,4 @@ def walk(collection: Union[DriveCollection, FolderCollection, FileCollection]) - # for d, f, folds, files in walk(folder): # d = d or root_drive # f = f or folder or root_folder - # yield d, f, folds, files \ No newline at end of file + # yield d, f, folds, files diff --git a/src/relic/sga/v7.py b/src/relic/sga/v7.py index 8107f30..6bdc184 100644 --- a/src/relic/sga/v7.py +++ b/src/relic/sga/v7.py @@ -18,6 +18,31 @@ version = ArchiveVersion.v7 +""" +Format According to ArchiveViewer (CoH2 Mod tools) +Magic: b'Archive_' +Version: UInt16 +Product: UInt16 (I call this minor) +NiceName: bytes[128]/str[64] (utf-16-le) +Header Size: UInt32 +Data Offset : UInt32 +(cached position in file here) +ToC Rel Pos: UInt32 +ToC Count : index_size +Folder Rel Pos: UInt32 +Folder Count : index_size +File Rel Pos: UInt32 +File Count : index_size +Name Buffer Pos : UInt32 +Name Buffer Count/Size ??? : index_size +unk??? : uint32 +Block Size : Uint32 +~~~ +ToC + + +""" + class _V7: """Mixin to allow classes to add `version` from the module level to the class level""" @@ -41,7 +66,7 @@ class FolderHeader(FolderHeaderABC, _V7): @dataclass class FileHeader(FileHeaderABC, _V7): - LAYOUT = Struct(f"<5L H 2L") + LAYOUT = Struct(f"<5L BB 2L") unk_a: int unk_b: int unk_c: int @@ -53,11 +78,11 @@ def compressed(self): @classmethod def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, data_off, comp_size, decomp_size, unk_a, unk_b, unk_c, unk_d = cls.LAYOUT.unpack_stream(stream) + name_off, data_off, comp_size, decomp_size, unk_a, unk_b1, unk_b2, unk_c, unk_d = cls.LAYOUT.unpack_stream(stream) # Name, File, Compressed, Decompressed, ???, ??? name_ptr = Ptr(name_off) data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b1, unk_b2, unk_c, unk_d) def pack(self, stream: BinaryIO) -> int: return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b, self.unk_c, self.unk_d) @@ -73,7 +98,7 @@ class ArchiveHeader(ArchiveHeaderABC, _V7): TOC_HEADER_SIZE = ArchiveToCPtr.LAYOUT.size toc_ptr: WindowPtr unk_a: int - unk_b: int + block_size: int # IDK what this means # This may not mirror DowI one-to-one, until it's verified, it stays here # noinspection DuplicatedCode @@ -82,7 +107,7 @@ def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bo def __eq__(self, other): # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and (self.unk_a, self.unk_b) == (other.unk_a, other.unk_b) \ + return self.name == other.name and (self.unk_a, self.block_size) == (other.unk_a, other.block_size) \ and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset @@ -91,13 +116,13 @@ def unpack(cls, stream: BinaryIO) -> ArchiveHeader: name, unk_a, data_offset, rsv_1 = cls.LAYOUT.unpack_stream(stream) toc_pos = stream.tell() stream.seek(cls.TOC_HEADER_SIZE, 1) - toc_size, unk_b = cls.LAYOUT_2.unpack_stream(stream) + toc_size, block_size = cls.LAYOUT_2.unpack_stream(stream) # assert toc_size == toc_size_2, (toc_size, toc_size_2) assert rsv_1 == 1 name = name.decode("utf-16-le").rstrip("\0") toc_ptr, data_ptr = WindowPtr(toc_pos, toc_size), WindowPtr(data_offset) - return cls(name, toc_ptr, data_ptr, unk_a,unk_b) + return cls(name, toc_ptr, data_ptr, unk_a,block_size) def pack(self, stream: BinaryIO) -> int: name, toc_size, data_offset = self.name.encode("utf-16-le"), self.toc_ptr.size, self.data_ptr.offset diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py index 9fd381d..45db809 100644 --- a/src/relic/sga/v9.py +++ b/src/relic/sga/v9.py @@ -74,7 +74,8 @@ def compressed(self): @dataclass class ArchiveHeader(ArchiveHeaderABC, _V9): # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"<128s QL QL 2L 256s") + + LAYOUT = Struct(f"<128s QL QQ L 256s") toc_ptr: WindowPtr data_ptr: WindowPtr @@ -97,10 +98,9 @@ def version(self) -> VersionLike: @classmethod def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, toc_pos, toc_size, data_pos, data_size, rsv_0_a, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) + name, toc_pos, toc_size, data_pos, data_size, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) assert rsv_1 == 1 - assert rsv_0_a == 0 toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) data_ptr = WindowPtr(offset=data_pos, size=data_size) name = name.decode("utf-16-le").rstrip("\0") @@ -108,7 +108,7 @@ def unpack(cls, stream: BinaryIO) -> ArchiveHeader: return cls(name, toc_ptr, data_ptr, unk) def pack(self, stream: BinaryIO) -> int: - args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 0, 1, self.unk + args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 1, self.unk return self.LAYOUT.pack_stream(stream, *args) def __eq__(self, other): From 5b545a68c0d6c76fdc1e9d2a43821abaa181bef0 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sat, 11 Jun 2022 01:44:45 -0800 Subject: [PATCH 07/19] New API for SGA (v2/v5/v7/v9) It still really sucks; but to avoid the problem that the OFFICIAL RELIC ARCHIVE TOOL has; each `version` reimplements it's own codebase I get around this as much as I can with ABCs but it is ugly. Ranting about official Archive Viewer; admittedly, they are distributed with CoH2 (and DowIII) so as long as it works for those formats; great! But then why bother supporting past formats? Dow II (v5) is among the formats which is supported (v4 is the earliest supported according to their version-assertion logic; COH1 I assume) but the checks to support that format don't seem to line up with my own research I'd have assumed this was because the switch to the Essence Engine (over whatever DOW1 uses) but COH2 and DOW2 both use the new Essence engine; so... ? As an aside; that explains why DOW1's enumerations for FileStorageType are different. I pray this is the last time I rewrite this API; I moved away from the clunky 'make no assumptions' strategy I had done before (which was very straightforward, but a lot of boilerplate) with a simple read/write at the archive level; with unpack/pack for helpers used for serializing data . I need to get better at typing (figuring out generics of some kind) to make ArchiveABC auto-type things like Folder/File/Drive definitions and their respective pack-aware helpers. We also stop using those nifty ptr objects; as much as I liked the idea of 'slicing' a binary stream; it's annoying to debug, and only hides where a stream is pointing: perhaps using a slice which only warns/errors when reading/writing outside the bounds (allowing seeks); and will only read to the 'end of the slice' --- src/relic/sga/abc_.py | 545 ------------------ src/relic/sga/core.py | 389 +++++++++++++ src/relic/sga/v2.py | 223 ++++--- src/relic/sga/v5.py | 235 ++++---- src/relic/sga/v7.py | 248 +++----- src/relic/sga/v9.py | 208 +++---- src/relic/sga/vX.py | 38 +- src/relic/sga_old/Archive Reader Notes.txt | 88 +++ src/relic/{sga => sga_old}/__init__.py | 8 +- src/relic/sga_old/archive.py | 102 ++++ src/relic/{sga => sga_old}/checksums.py | 0 src/relic/{sga => sga_old}/common.py | 73 +-- src/relic/{sga => sga_old}/io.py | 10 +- src/relic/{sga => sga_old}/protocols.py | 8 +- src/relic/sga_old/v2.py | 151 +++++ src/relic/sga_old/v5.py | 157 +++++ src/relic/sga_old/v9.py | 167 ++++++ src/relic/sga_old/vX.py | 39 ++ src/relic/{sga => sga_old}/writer.py | 0 src/scripts/dump_sga.py | 6 +- src/scripts/universal/sga/common.py | 2 +- src/scripts/universal/sga/unpack.py | 4 +- tests/relic/sga/archive/test_archive.py | 6 +- .../relic/sga/archive/test_archive_header.py | 12 +- tests/relic/sga/datagen.py | 8 +- tests/relic/sga/file/test_file_header.py | 4 +- tests/relic/sga/test_vX_interface.py | 6 +- 27 files changed, 1544 insertions(+), 1193 deletions(-) delete mode 100644 src/relic/sga/abc_.py create mode 100644 src/relic/sga/core.py create mode 100644 src/relic/sga_old/Archive Reader Notes.txt rename src/relic/{sga => sga_old}/__init__.py (75%) create mode 100644 src/relic/sga_old/archive.py rename src/relic/{sga => sga_old}/checksums.py (100%) rename src/relic/{sga => sga_old}/common.py (64%) rename src/relic/{sga => sga_old}/io.py (85%) rename src/relic/{sga => sga_old}/protocols.py (96%) create mode 100644 src/relic/sga_old/v2.py create mode 100644 src/relic/sga_old/v5.py create mode 100644 src/relic/sga_old/v9.py create mode 100644 src/relic/sga_old/vX.py rename src/relic/{sga => sga_old}/writer.py (100%) diff --git a/src/relic/sga/abc_.py b/src/relic/sga/abc_.py deleted file mode 100644 index 796895a..0000000 --- a/src/relic/sga/abc_.py +++ /dev/null @@ -1,545 +0,0 @@ -from __future__ import annotations - -import zlib -from abc import ABC -from collections import UserDict -from dataclasses import dataclass -from pathlib import PurePosixPath -from typing import List, BinaryIO, Optional, Dict, ClassVar, Tuple, Type - -from serialization_tools.ioutil import Ptr, WindowPtr -from serialization_tools.size import KiB -from serialization_tools.structx import Struct - -# import relic.sga.io -from relic.common import VersionLike -from relic.sga.common import ArchiveRange, ArchiveVersion, walk -# from relic.sga.io import walk -from relic.sga.protocols import ArchiveHeader, Archive, FileCollection, FolderCollection, Folder, File, VirtualDrive, ArchiveWalk, DriveCollection, DriveChild, FolderChild, ArchiveWalkable - -_NULL = b"\0" -_BUFFER_SIZE = 64 * KiB - - -@dataclass -class ArchiveTableOfContentsABC: - drives: List[VirtualDriveABC] - folders: List[FolderABC] - files: List[FileABC] - names: Dict[int, str] - - @classmethod - def create(cls, toc_headers: ArchiveTableOfContentsHeadersABC) -> ArchiveTableOfContentsABC: - - drives = [VirtualDriveABC.create(header) for header in toc_headers.drives] - folders = [FolderABC.create(header) for header in toc_headers.folders] - files = [FileABC.create(header) for header in toc_headers.files] - - return ArchiveTableOfContentsABC(drives, folders, files, toc_headers.names) - - def load_data(self, stream: BinaryIO): - for _ in self.files: - _.load_data(stream) - - def load_toc(self): - for _ in self.drives: - _.load_toc(self) - for _ in self.folders: - _.load_toc(self) - for _ in self.files: - _.load_toc(self) - - def build_tree(self): - for _ in self.drives: - _.build_tree() - - -class NameBufferABC: - @classmethod - def unpack(cls, stream: BinaryIO, name_count: int) -> Dict[int,str]: - inst = {} - start = stream.tell() # use stream to avoid invalidating window - while len(inst) < name_count: - remaining = name_count - len(inst) - current = stream.tell() # Get relative pos to start - buffer = stream.read(_BUFFER_SIZE) - if len(buffer) == 0: - raise TypeError("Ran out of data!") - terminal_null = buffer.endswith(_NULL) - parts = buffer.split(_NULL, remaining) - - offset = 0 - for i, p in enumerate(parts): - if i == len(parts) - 1: - break - inst[current - start + offset] = p.decode("ascii") - offset += len(p) + 1 # +1 to include null terminal - - if not terminal_null: - stream.seek(current + offset) - return inst - - -@dataclass -class ArchiveTableOfContentsHeadersABC: - drives: List[VirtualDriveHeaderABC] - folders: List[FolderHeaderABC] - files: List[FileHeaderABC] - names: Dict[int, str] - VDRIVE_HEADER_CLS: ClassVar[Type[VirtualDriveHeaderABC]] - FOLDER_HEADER_CLS: ClassVar[Type[FolderHeaderABC]] - FILE_HEADER_CLS: ClassVar[Type[FileHeaderABC]] - NAME_BUFFER_CLS: ClassVar[Type[NameBufferABC]] = NameBufferABC - - @classmethod - def old_unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC, version: VersionLike = None) -> ArchiveTableOfContentsHeadersABC: - raise TypeError('Use .unpack() and APIS to unpack!') - - @classmethod - def unpack(cls, stream: BinaryIO, ptr: ArchiveTableOfContentsPtrABC) -> ArchiveTableOfContentsHeadersABC: - local_ptr = ptr.virtual_drive_ptr - with local_ptr.stream_jump_to(stream) as handle: - virtual_drives = [cls.VDRIVE_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] - - local_ptr = ptr.folder_ptr - with local_ptr.stream_jump_to(stream) as handle: - folders = [cls.FOLDER_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] - - local_ptr = ptr.file_ptr - with local_ptr.stream_jump_to(stream) as handle: - files = [cls.FILE_HEADER_CLS.unpack(handle) for _ in range(local_ptr.count)] - - # This gets a bit wierd - local_ptr = ptr.name_ptr - with local_ptr.stream_jump_to(stream) as handle: - names = cls.NAME_BUFFER_CLS.unpack(handle, local_ptr.count) - - return ArchiveTableOfContentsHeadersABC(virtual_drives, folders, files, names) - - -@dataclass -class TocItemPtrABC(Ptr): - def __init__(self, offset: int, count: int, whence: int = 0): - super().__init__(offset, whence) - self.count = count - - -@dataclass -class ArchiveTableOfContentsPtrABC: - # Virtual Drives (offset, count), Folder (offset, count), File (offset, count), Names (offset, count) - LAYOUT: ClassVar[Struct] - virtual_drive_ptr: TocItemPtrABC - folder_ptr: TocItemPtrABC - file_ptr: TocItemPtrABC - name_ptr: TocItemPtrABC - - @property - def version(self) -> ArchiveVersion: - raise NotImplementedError - - @classmethod - def _unpack_tuple(cls, stream: BinaryIO) -> Tuple[TocItemPtrABC, TocItemPtrABC, TocItemPtrABC, TocItemPtrABC]: - vd_offset, vd_count, fold_offset, fold_count, file_offset, file_count, name_offset, name_count = cls.LAYOUT.unpack_stream(stream) - vd_ptr = TocItemPtrABC(vd_offset, vd_count) - fold_ptr = TocItemPtrABC(fold_offset, fold_count) - file_ptr = TocItemPtrABC(file_offset, file_count) - name_ptr = TocItemPtrABC(name_offset, name_count) - return vd_ptr, fold_ptr, file_ptr, name_ptr - - def _pack_tuple(self) -> Tuple[int, int, int, int, int, int, int, int]: - return self.virtual_drive_ptr.offset, self.virtual_drive_ptr.count, \ - self.folder_ptr.offset, self.folder_ptr.count, \ - self.file_ptr.offset, self.file_ptr.count, \ - self.name_ptr.offset, self.name_ptr.count - - @classmethod - def unpack_version(cls, stream: BinaryIO, version: VersionLike) -> 'ArchiveTableOfContentsPtrABC': - raise TypeError("Use APIs[version].ArchiveTableOfContentsPtr.unpack(stream)") - # toc_ptr_class = _ToCPtr_VERSION_MAP.get(version) - # - # if not toc_ptr_class: - # raise NotImplementedError(version) - # - # return relic.sga.io.unpack_archive(stream) - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'ArchiveTableOfContentsPtrABC': - args = cls._unpack_tuple(stream) - return cls(*args) - - def pack(self, stream: BinaryIO) -> int: - args = self._pack_tuple() - return self.LAYOUT.pack_stream(stream, *args) - - def __str__(self): - parts = [f"{k}={v}" for k, v in self.__dict__.items()] - return f"{self.__class__.__name__}({', '.join(parts)})" - - def __repr__(self): - return str(self) - - -@dataclass -class ArchiveHeaderABC(ArchiveHeader, ABC): - name: str - toc_ptr: Ptr - data_ptr: WindowPtr - - -@dataclass -class ArchiveABC(Archive): - header: ArchiveHeader - """Sparse represents whether data was loaded on creation.""" - - def walk(self) -> ArchiveWalk: - for drive in self.drives: - for inner_walk in drive.walk(): - yield inner_walk - - _sparse: bool - - def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse: bool): - self.header = header - self._sparse = _sparse - self.drives = drives - - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveTableOfContentsPtrABC - TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeadersABC - TOC_CLS: ClassVar[Type[ArchiveTableOfContentsABC]] = ArchiveTableOfContentsABC - - @classmethod - def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): - # version = header.version - with header.toc_ptr.stream_jump_to(stream) as handle: - toc_ptr = cls.TOC_PTR_CLS.unpack(handle) - toc_headers = cls.TOC_HEADERS_CLS.unpack(handle, toc_ptr) - toc = cls.TOC_CLS.create(toc_headers) - - toc.load_toc() - toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up - if not sparse: - with header.data_ptr.stream_jump_to(stream) as handle: - toc.load_data(handle) - - return cls(header, toc.drives, sparse) - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -@dataclass -class FileABC(File): - header: FileHeaderABC - name: str - data: Optional[bytes] = None - _decompressed: bool = False - parent_folder: Optional[FolderABC] = None - parent_drive: Optional[VirtualDriveABC] = None - - @property - def data_loaded(self) -> bool: - return self.data is not None - - @property - def expects_decompress(self) -> bool: - return self.header.compressed - - @property - def decompressed(self) -> bool: - if self.data_loaded: - return self._decompressed or not self.expects_decompress - else: - return False - - @property - def full_path(self) -> PurePosixPath: - if self.parent_folder: - return self.parent_folder.full_path / self.name - elif self.parent_drive: - return self.parent_drive.full_path / self.name - else: - return PurePosixPath(self.name) - - @classmethod - def create(cls, header: FileHeaderABC) -> FileABC: - _decompressed = False - # noinspection PyTypeChecker - return FileABC(header, None, None, _decompressed) - - def load_name_from_lookup(self, name_lookup: Dict[int, str]): - self.name = name_lookup[self.header.name_sub_ptr.offset] - - def load_toc(self, toc: ArchiveTableOfContentsABC): - self.load_name_from_lookup(toc.names) - - def read_data(self, stream: BinaryIO, decompress: bool = False) -> bytes: - with self.header.data_sub_ptr.stream_jump_to(stream) as handle: - buffer = handle.read(self.header.compressed_size) - if decompress and self.expects_decompress: - return zlib.decompress(buffer) - else: - return buffer - - def load_data(self, stream: BinaryIO, decompress: bool = False): - self.data = self.read_data(stream, decompress) - self._decompressed = decompress - - def get_decompressed_data(self) -> bytes: - if self.decompressed: - return self.data - else: - # zlib_header = Struct("2B").unpack(self.data[:2]) - # full_zlib_header = (zlib_header[0] & 0xF0) >> 4, zlib_header[0] & 0xF, \ - # (zlib_header[1] & 0b11000000) >> 6, (zlib_header[1] >> 5) & 0b1, zlib_header[1] & 0b11111 - # convert = {7: 32, 6: 16} - # assert convert[full_zlib_header[0]] == self.header.compression_flag.value - return zlib.decompress(self.data) - - def decompress(self): - self.data = self.get_decompressed_data() - self._decompressed = True - - -@dataclass -class FileHeaderABC: - LAYOUT: ClassVar[Struct] - name_sub_ptr: Ptr # Sub ptr is expected to be used via window (E.G. 'WindowPtr() as handle', then, 'data_sub_ptr.stream_jump_to(handle)') - data_sub_ptr: Ptr - decompressed_size: int - compressed_size: int - - def __eq__(self, other): - # TODO ptr equality - return self.decompressed_size == other.decompressed_size and self.compressed_size == other.compressed_size - - @property - def compressed(self): - raise NotImplementedError - - @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeaderABC: - raise NotImplementedError - - def pack(self, stream: BinaryIO) -> int: - raise NotImplementedError - - @classmethod - def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> FileHeaderABC: - raise TypeError("Use APIs[version].FileHeader.unpack(stream)") - # _VERSION_MAP = None # TODO move to IO - # header_class = _FILE_HEADER_VERSION_MAP.get(version) - # - # if not header_class: - # raise NotImplementedError(version) - # - # return header_class.old_unpack(stream) - - -@dataclass -class FolderCollectionABC(FolderCollection): - sub_folders: List[Folder] - - -@dataclass -class FileCollectionABC(FileCollection): - files: List[File] - - -@dataclass -class FolderChildABC(FolderChild): - parent_folder: Optional[Folder] - - -@dataclass -class DriveChildABC(DriveChild): - parent_drive: Optional[VirtualDrive] - - -@dataclass -class DriveCollectionABC(DriveCollection): - drives: List[VirtualDrive] - - -@dataclass -class FolderABC(Folder, FolderCollectionABC, FileCollectionABC, FolderChildABC, DriveChildABC): - def walk(self) -> ArchiveWalk: - drive = self.parent_drive - yield drive, self, self.sub_folders, self.files - for folder in self.sub_folders: - for inner_walk in folder.walk(): - yield inner_walk - - header: FolderHeaderABC - name: str - - def __init__(self, header: FolderHeaderABC, name: str, sub_folders: List[FolderABC], files: List[FileABC], parent_folder: Optional[FolderABC] = None, drive: Optional[VirtualDriveABC] = None): - self.header = header - self.name = name - self.sub_folders = sub_folders - self.files = files - self.parent_drive = drive - self.parent_folder = parent_folder - - @property - def full_path(self) -> PurePosixPath: - if self.parent_drive: - return self.parent_drive.full_path / self.name - else: - return PurePosixPath(self.name) - - @classmethod - def create(cls, header: FolderHeaderABC) -> FolderABC: - name = None - folders = [None] * header.sub_folder_range.size - files = [None] * header.file_range.size - # noinspection PyTypeChecker - return FolderABC(header, name, folders, files) - - def load_toc(self, toc: ArchiveTableOfContentsABC): - self.load_folders(toc.folders) - self.load_files(toc.files) - self.load_name_from_lookup(toc.names) - - def load_name_from_lookup(self, name_lookup: Dict[int, str]): - self.name = name_lookup[self.header.name_offset] - - def load_folders(self, folders: List[FolderABC]): - if self.header.sub_folder_range.start < len(folders): - for folder_index in self.header.sub_folder_range: - sub_folder_index = folder_index - self.header.sub_folder_range.start - f = self.sub_folders[sub_folder_index] = folders[folder_index] - f.parent_folder = self - - def load_files(self, files: List[FileABC]): - if self.header.file_range.start < len(files): - for file_index in self.header.file_range: - sub_file_index = file_index - self.header.file_range.start - f = self.files[sub_file_index] = files[file_index] - f.parent_folder = self - - -@dataclass -class FolderHeaderABC: - LAYOUT: ClassVar[Struct] - - name_offset: int - sub_folder_range: ArchiveRange - file_range: ArchiveRange - - @classmethod - def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> 'FolderHeaderABC': - raise TypeError("Use APIs[version].FolderHeader.unpack(stream)") - # header_class = _FOLDER_HEADER_VERSION_MAP.get(version) - # - # if not header_class: - # raise NotImplementedError(version) - # - # return header_class.unpack(stream) - - def pack(self, stream: BinaryIO) -> int: - args = self.name_offset, self.sub_folder_range.start, self.sub_folder_range.end, \ - self.file_range.start, self.file_range.end - return self.LAYOUT.pack_stream(stream, *args) - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'FolderHeaderABC': - name_offset, sub_folder_start, sub_folder_end, file_start, file_end = cls.LAYOUT.unpack_stream(stream) - sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) - file_range = ArchiveRange(file_start, file_end) - return cls(name_offset, sub_folder_range, file_range) - - -@dataclass -class VirtualDriveHeaderABC: - LAYOUT: ClassVar[Struct] - - path: str - name: str - - sub_folder_range: ArchiveRange - file_range: ArchiveRange - unk: bytes - - @classmethod - def old_unpack(cls, stream: BinaryIO, version: VersionLike) -> 'VirtualDriveHeaderABC': - raise TypeError("Use APIs[version].VirtualDriveHeader.unpack(stream)") - # header_class = _VIRTUAL_DRIVE_HEADER_VERSION_MAP.get(version) - # - # if not header_class: - # raise NotImplementedError(version) - # - # return header_class.unpack(stream) - - def pack(self, stream: BinaryIO) -> int: - args = self.path.encode("ascii"), self.name.encode("ascii"), self.sub_folder_range.start, self.sub_folder_range.end, \ - self.file_range.start, self.file_range.end, 0 - return self.LAYOUT.pack_stream(stream, *args) - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'VirtualDriveHeaderABC': - path, name, sub_folder_start, sub_folder_end, file_start, file_end, unk = cls.LAYOUT.unpack_stream(stream) - path, name = path.decode("ascii").rstrip("\00"), name.decode("ascii").rstrip("\00") - sub_folder_range = ArchiveRange(sub_folder_start, sub_folder_end) - file_range = ArchiveRange(file_start, file_end) - return cls(path, name, sub_folder_range, file_range, unk) - - -@dataclass -class VirtualDriveABC(FolderCollectionABC, FileCollectionABC): - header: VirtualDriveHeaderABC - - def __init__(self, header: VirtualDriveHeaderABC, sub_folders: List[FolderABC], files: List[FileABC]): - self.header = header - self.sub_folders = sub_folders - self.files = files - - @property - def path(self) -> str: - return self.header.path - - @property - def name(self) -> str: - return self.header.name - - def walk(self) -> ArchiveWalk: - yield self, None, self.sub_folders, self.files - for folder in self.sub_folders: - for inner_walk in folder.walk(): - yield inner_walk - - @property - def full_path(self) -> PurePosixPath: - return PurePosixPath(self.path + ":") - - @classmethod - def create(cls, header: VirtualDriveHeaderABC) -> VirtualDriveABC: - folders = [None] * header.sub_folder_range.size - files = [None] * header.file_range.size - # noinspection PyTypeChecker - return VirtualDriveABC(header, folders, files) - - def load_toc(self, toc: ArchiveTableOfContentsABC): - self.load_folders(toc.folders) - self.load_files(toc.files) - - def load_folders(self, folders: List[FolderABC]): - if self.header.sub_folder_range.start < len(folders): - for folder_index in self.header.sub_folder_range: - sub_folder_index = folder_index - self.header.sub_folder_range.start - f = self.sub_folders[sub_folder_index] = folders[folder_index] - f.parent_drive = self - - def load_files(self, files: List[FileABC]): - if self.header.file_range.start < len(files): - for file_index in self.header.file_range: - sub_file_index = file_index - self.header.file_range.start - f = self.files[sub_file_index] = files[file_index] - f.parent_drive = self - - def build_tree(self): - self.sub_folders = [f for f in self.sub_folders if not f.parent_folder] - self.files = [f for f in self.files if not f.parent_folder] - - -ArchiveTOC = ArchiveTableOfContentsABC -ArchiveToCPtrABC = ArchiveTableOfContentsPtrABC diff --git a/src/relic/sga/core.py b/src/relic/sga/core.py new file mode 100644 index 0000000..2d4e274 --- /dev/null +++ b/src/relic/sga/core.py @@ -0,0 +1,389 @@ +from __future__ import annotations + +import zlib +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from pathlib import PosixPath, PurePosixPath +from typing import ClassVar, BinaryIO, Optional, List, Type, Dict, Tuple + +from serialization_tools.ioutil import WindowPtr +from serialization_tools.magic import MagicWordIO +from serialization_tools.structx import Struct + +MagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) + + +class FileVerificationType(Enum): + None_ = 0 # unknown real values, assuming incremental + CRC = 1 # unknown real values, assuming incremental + CRCBlocks = 2 # unknown real values, assuming incremental + MD5Blocks = 3 # unknown real values, assuming incremental + SHA1Blocks = 4 # unknown real values, assuming incremental + + +class FileStorageType(Enum): + Store = 0 + StreamCompress = 1 # 16 + BufferCompress = 2 # 32 + + +@dataclass +class Version: + """ The Major Version; Relic revers to this as the 'Version' """ + major: int + """ The Minor Version; Relic refers to this as the 'Product' """ + minor: Optional[int] = 0 + + LAYOUT: ClassVar[Struct] = Struct("<2H") + + def __str__(self) -> str: + return f"Version {self.major}.{self.minor}" + + def __eq__(self, other): + if isinstance(other, Version): + return self.major == other.major and self.minor == other.minor + else: + return super().__eq__(other) + + def __hash__(self): + # Realistically; Version will always be <256 + # But we could manually set it to something much bigger by accident; and that may cause collisions + return self.major << 32 + self.minor + + @classmethod + def unpack(cls, stream: BinaryIO): + layout: Struct = cls.LAYOUT + args = layout.unpack_stream(stream) + return cls(*args) + + def pack(self, stream: BinaryIO): + layout: Struct = self.LAYOUT + args = (self.major, self.minor) + return layout.pack_stream(stream, *args) + + def assert_version_matches(self, expected: Version): + if self != expected: + raise VersionMismatchError(self, expected) + + +def _print_mismatch(name: str, received, expected): + msg = f"Unexpected {name}" + if received or expected: + msg += ";" + if received: + msg += f" got `{str(received)}`" + if received and expected: + msg += "," + if expected: + msg += f" expected `{str(expected)}`" + return msg + "!" + + +class VersionMismatchError(Exception): + def __init__(self, version: Version = None, expected: Version = None): + self.version = version + self.expected = expected + + def __str__(self): + return _print_mismatch("Version", self.version, self.expected) + + +class Md5MismatchError(Exception): + def __init__(self, recieved: bytes = None, expected: bytes = None): + self.recieved = recieved + self.expected = expected + + def __str__(self): + return _print_mismatch("MD5", self.recieved, self.expected) + + +def _read_names_as_lookup(stream: BinaryIO, name_count_or_size: int, is_count: bool = True): + BUF_SIZE = 64 # stolen from archive reader + lookup = {} + offset = 0 + if not is_count: + buffer = stream.read(name_count_or_size) # size + names = [_.decode("ascii") for _ in buffer.split(b"\0")] + for name in names: + lookup[offset] = name + offset += len(name) + 1 + return lookup + else: + # THIS GETS COMPLICATED + start_pos = stream.tell() + current_name = b"" + # While we still need to reaad names + while len(lookup) < name_count_or_size: + # Read a partial buffer in + buffer = stream.read(BUF_SIZE) + if len(buffer) == 0: + raise Exception("Buffer ran out of data!") + # Try to do a fast separate on the null byte + enc_names = buffer.split(b"\0") + current_name += enc_names[0] + # Needs more data (no b"\0" was found) + if len(enc_names) == 1 and len(buffer) == BUF_SIZE: + continue + else: + # Handle [0] + lookup[offset] = current_name.decode("ascii") + offset += len(current_name) + 1 + current_name = b"" + # Handle [1,N] by seeking to offset and looping again + stream.seek(start_pos + offset) + continue + return lookup + + +@dataclass +class BlobPtrs: + header_pos: int + header_size: Optional[int] + data_pos: int + data_size: Optional[int] + + +@dataclass +class ToCPtrsABC: + vdrive_rel_pos: int + vdrive_count: int + folder_rel_pos: int + folder_count: int + file_rel_pos: int + file_count: int + name_rel_pos: int + name_count_or_size: int # meaning varies between version + + LAYOUT: ClassVar[Struct] + """ Only 'counts' are uint16s """ + LAYOUT_UINT16: ClassVar = Struct(" bytes: + if self.size_in_archive == 0: + return b"" + else: + with WindowPtr(self.abs_data_pos, self.size_in_archive).stream_jump_to(stream) as window: + file_data = window.read() + if self.storage_type == FileStorageType.Store: + return file_data + elif self.storage_type in [FileStorageType.StreamCompress, FileStorageType.BufferCompress]: + return zlib.decompress(file_data) + else: + raise NotImplementedError(f"Reading a file stored as `{self.storage_type}` is not supported!") + + +@dataclass +class FileMetaABC: + storage: FileStorageType + + +@dataclass +class FileABC: + name: str + meta: FileMetaABC + data: Optional[bytes] = None + sparse_info: Optional[FileSparseInfo] = None + + def read_data(self, stream: BinaryIO): + self.data = self.sparse_info.read(stream) + + +class ArchiveMetaABC: + ... # TODO + + +@dataclass +class ArchiveABC: + MAGIC: ClassVar = MagicWord + VERSION: ClassVar[Version] + name: str + meta: ArchiveMetaABC + drives: List[DriveABC] + + # header_size: int # Not required + # data_offset: int # Not required + + # header_offset: int + + TOC_PTRS: ClassVar[Type[ToCPtrsABC]] + VDRIVE_DEF: ClassVar[Type[DriveDefABC]] + FOLDER_DEF: ClassVar[Type[FolderDefABC]] + FILE_DEF: ClassVar[Type[FileDefABC]] + NAME_BUFFER_USES_COUNT: ClassVar[bool] = True + + @classmethod + def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: + raise NotImplementedError + + @classmethod + def _read_toc(cls, header_stream: BinaryIO, header_pos: int, toc_ptrs: ToCPtrsABC): + vdrive_stream = header_stream + vdrive_stream.seek(header_pos + toc_ptrs.vdrive_rel_pos) + vdrive_defs = [cls.VDRIVE_DEF.unpack(vdrive_stream) for _ in range(toc_ptrs.vdrive_count)] + + folders_stream = header_stream + folders_stream.seek(header_pos + toc_ptrs.folder_rel_pos) + folder_defs = [cls.FOLDER_DEF.unpack(folders_stream) for _ in range(toc_ptrs.folder_count)] + + files_stream = header_stream + files_stream.seek(header_pos + toc_ptrs.file_rel_pos) + file_defs = [cls.FILE_DEF.unpack(files_stream) for _ in range(toc_ptrs.file_count)] + + name_stream = header_stream + name_stream.seek(header_pos + toc_ptrs.name_rel_pos) + names = _read_names_as_lookup(name_stream, toc_ptrs.name_count_or_size, is_count=cls.NAME_BUFFER_USES_COUNT) + + return vdrive_defs, folder_defs, file_defs, names + + @classmethod + def _assemble_files(cls, file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): + raise NotImplementedError + + @classmethod + def _assemble_folders(cls, folder_defs: List[FolderDefABC], files: List[FileABC], names: Dict[int, str]): + folders: List[FolderABC] = [] + for f_def in folder_defs: + full_name = names[f_def.name_rel_pos] + if full_name != "": + name = str(PurePosixPath(full_name).parts[-1]) # hack to get last portion of pathed-name + else: + name = "" + folder = FolderABC(name, None, files[f_def.file_start:f_def.file_end + 1]) + folders.append(folder) + for f_def, folder in zip(folder_defs, folders): + folder.folders = folders[f_def.folder_start:f_def.folder_end + 1] + return folders + + @classmethod + def _assemble_drives(cls, drive_defs: List[DriveDefABC], folders: List[FolderABC]): + drives: List[DriveABC] = [] + for d_def in drive_defs: + folder = folders[d_def.folder_root] + drive = DriveABC(folder.folders, folder.files, d_def.alias, d_def.name) + drives.append(drive) + return drives + + @classmethod + def _assemble_hierarchy(cls, vdrive_defs: List[DriveDefABC], folder_defs: List[FolderDefABC], file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): + files = cls._assemble_files(file_defs, names, data_pos) + folders = cls._assemble_folders(folder_defs, files, names) + vdrives = cls._assemble_drives(vdrive_defs, folders) + return vdrives, folders, files + + @classmethod + def read(cls, stream: BinaryIO, sparse: bool = False): + cls.MAGIC.read_magic_word(stream) + archive_version = Version.unpack(stream) + archive_version.assert_version_matches(cls.VERSION) + name, meta, blob_ptrs, toc_ptrs = cls._unpack_meta(stream) + + # TOC Block + vdrive_defs, folder_defs, file_defs, names = cls._read_toc(stream, blob_ptrs.header_pos, toc_ptrs) + + vdrives, _, files = cls._assemble_hierarchy(vdrive_defs, folder_defs, file_defs, names, blob_ptrs.data_pos) + + if not sparse: + for file in files: + file.read_data(stream) + + return cls(name, meta, vdrives) + + # def walk(self) -> ArchiveWalk: + # for drive in self.drives: + # for _ in drive.walk(): + # yield _ diff --git a/src/relic/sga/v2.py b/src/relic/sga/v2.py index 35f5110..1cce002 100644 --- a/src/relic/sga/v2.py +++ b/src/relic/sga/v2.py @@ -1,151 +1,124 @@ from __future__ import annotations +import hashlib from dataclasses import dataclass -from enum import Enum -from typing import BinaryIO, Tuple, Type, ClassVar +from datetime import datetime, timezone +from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -from serialization_tools.ioutil import WindowPtr, Ptr +from serialization_tools.size import KiB from serialization_tools.structx import Struct -from relic.common import VersionLike -from relic.sga import abc_ -from relic.sga.abc_ import ArchiveHeaderABC, ArchiveABC, FileHeaderABC, FolderHeaderABC, VirtualDriveHeaderABC, ArchiveToCPtrABC, ArchiveTableOfContentsHeadersABC -from relic.sga.checksums import validate_md5_checksum -from relic.sga.common import ArchiveVersion -from relic.sga.vX import APIvX +from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, Md5MismatchError -version = ArchiveVersion.v2 +class _ToCPtrs(ToCPtrsABC): + LAYOUT = ToCPtrsABC.LAYOUT_UINT16 -class _V2: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar +class _DriveDef(DriveDefABC): + LAYOUT = DriveDefABC.LAYOUT_UINT16 -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V2): - LAYOUT = Struct("< LH LH LH LH") - - -@dataclass -class ArchiveHeader(ArchiveHeaderABC, _V2): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET - LAYOUT = Struct(f"< 16s 128s 16s 2L") - # The eigen value is a guid? also knew that layout looked familiar - MD5_EIGENVALUES = (b"E01519D6-2DB7-4640-AF54-0A23319C56C3", b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF") - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - @classmethod - @property - def version(cls) -> VersionLike: - return ArchiveVersion.Dow - - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - csum_a, name, csum_b, toc_size, data_offset = cls.LAYOUT.unpack_stream(stream) - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=stream.tell(), size=toc_size) - data_ptr = WindowPtr(offset=data_offset, size=None) - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b)) - - def pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset - return self.LAYOUT.pack_stream(stream, *args) - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - - -class FileCompressionFlag(Enum): - # Compression flag is either 0 (Decompressed) or 16/32 which are both compressed - # Aside from 0; these appear to be the Window-Sizes for the Zlib Compression (In KibiBytes) - Decompressed = 0 - Compressed16 = 16 - Compressed32 = 32 - - def compressed(self) -> bool: - return self != FileCompressionFlag.Decompressed +class _FolderDef(FolderDefABC): + LAYOUT = FolderDefABC.LAYOUT_UINT16 @dataclass -class FileHeader(FileHeaderABC, _V2): - # name - LAYOUT = Struct(f"<5L") - compression_flag: FileCompressionFlag - - def __eq__(self, other): - return self.compression_flag == other.compression_flag and super().__eq__(other) +class FileDef(FileDefABC): + LAYOUT = Struct("<5I") @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_offset, compression_flag_value, data_offset, decompressed_size, compressed_size = cls.LAYOUT.unpack_stream(stream) - compression_flag = FileCompressionFlag(compression_flag_value) - name_ptr = Ptr(name_offset) - data_ptr = WindowPtr(data_offset, compressed_size) - return cls(name_ptr, data_ptr, decompressed_size, compressed_size, compression_flag) + def unpack(cls, stream: BinaryIO): + name_rel_pos, storage_type_val_v2, data_rel_pos, length, store_length = cls.LAYOUT.unpack_stream(stream) + storage_type_map = {0: FileStorageType.Store, 16: FileStorageType.StreamCompress, 32: FileStorageType.BufferCompress} + storage_type = storage_type_map[storage_type_val_v2] + return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type) - def pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.compression_flag.value, self.data_sub_ptr.offset, self.decompressed_size, self.compressed_size) - @property - def compressed(self): - return self.compression_flag.compressed() +FileMeta = FileMetaABC +File = FileABC +Folder = FolderABC +Drive = DriveABC +# class File(FileABC): +# meta: FileMeta -@dataclass -class FolderHeader(FolderHeaderABC, _V2): - LAYOUT = Struct("< L 4H") +# @dataclass +# class Folder(FolderABC): +# folders: List[Folder] +# files: List[File] +# +# +# class Drive(DriveABC): +# folders: List[Folder] +# files: List[File] @dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V2): - LAYOUT = Struct("< 64s 64s 4H 2s") - - -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - - -@dataclass(init=False) -class Archive(ArchiveABC, _V2): - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr - TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -# Class Aliases; don't need to be inherited -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC +class ArchiveMeta(ArchiveMetaABC): + file_md5: bytes + header_md5: bytes + blob_ptr: BlobPtrs # Cached for MD5 + FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" + HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" + + @staticmethod + def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): + _BUF_SIZE = 256 * KiB + hasher = hashlib.md5(eigen) + stream.seek(start) + if size is None: + while True: + buffer = stream.read(_BUF_SIZE) + hasher.update(buffer) + if len(buffer) != _BUF_SIZE: + break + else: + read = 0 + while read < size: + buffer = stream.read(min(_BUF_SIZE, size - read)) + read += len(buffer) + hasher.update(buffer) + md5 = bytes.fromhex(hasher.hexdigest()) + if md5 != expected: + raise Md5MismatchError(md5, expected) + + def validate_file_md5(self, stream: BinaryIO): + self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) + + def validate_header_md5(self, stream: BinaryIO): + self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) + + +class Archive(ArchiveABC): + meta: ArchiveMeta + # drives: List[Drive] # typing + + TOC_PTRS = _ToCPtrs + VDRIVE_DEF = _DriveDef + FOLDER_DEF = _FolderDef + FILE_DEF = FileDef + VERSION = Version(2) + META_PREFIX_LAYOUT = Struct("<16s 128s 16s 3I") + @classmethod + def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): + files = [] + for f_def in file_defs: + meta = FileMeta(f_def.storage_type) + sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) + file = File(names[f_def.name_rel_pos], meta, None, sparse) + files.append(file) + return files -class APIv2(APIvX, _V2): - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - ArchiveHeader = ArchiveHeader - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive + @classmethod + def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: + encoded_name: bytes + file_md5, encoded_name, header_md5, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) + decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") + assert RSV_1 == 1 + header_pos = stream.tell() + toc_ptrs = cls.TOC_PTRS.unpack(stream) + blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) + meta = ArchiveMeta(file_md5, header_md5, blob_ptrs) + return decoded_name, meta, blob_ptrs, toc_ptrs diff --git a/src/relic/sga/v5.py b/src/relic/sga/v5.py index 6485be6..9321548 100644 --- a/src/relic/sga/v5.py +++ b/src/relic/sga/v5.py @@ -1,157 +1,138 @@ from __future__ import annotations +import hashlib from dataclasses import dataclass -from typing import BinaryIO, Tuple, ClassVar, Type +from datetime import datetime, timezone +from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.size import KiB from serialization_tools.structx import Struct -from relic.common import VersionLike -from relic.sga import v2 -from relic.sga.abc_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC -from relic.sga.checksums import validate_md5_checksum -from relic.sga.common import ArchiveVersion -from relic.sga.v2 import ArchiveToCPtrABC -from relic.sga import abc_ -from relic.sga.vX import APIvX +from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, Md5MismatchError -version = ArchiveVersion.v5 +class _ToCPtrs(ToCPtrsABC): + LAYOUT = ToCPtrsABC.LAYOUT_UINT16 -class _V5: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! - -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V5): - LAYOUT = Struct("< 64s 64s 4H 2s") +class _DriveDef(DriveDefABC): + LAYOUT = DriveDefABC.LAYOUT_UINT16 -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V5): - LAYOUT = v2.ArchiveToCPtr.LAYOUT +class _FolderDef(FolderDefABC): + LAYOUT = FolderDefABC.LAYOUT_UINT16 @dataclass -class FolderHeader(FolderHeaderABC, _V5): - LAYOUT = Struct("< L 4H") - - -@dataclass -class FileHeader(FileHeaderABC, _V5): - LAYOUT = Struct(f"<5L H") - unk_a: int - unk_b: int - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size +class FileDef(FileDefABC): + LAYOUT = Struct("<5I 2B") + # v7 Specific data + modified: datetime # Unix EPOCH + verification_type: FileVerificationType @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, data_off, comp_size, decomp_size, unk_a, unk_b = cls.LAYOUT.unpack_stream(stream) - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b) - - def pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b) - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) + def unpack(cls, stream: BinaryIO): + # print(stream.tell()) + name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val = cls.LAYOUT.unpack_stream(stream) + modified = datetime.fromtimestamp(modified_seconds, timezone.utc) + storage_type = FileStorageType(storage_type_val) + verification_type = FileVerificationType(verification_type_val) + return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type) @dataclass -class ArchiveHeader(ArchiveHeaderABC, _V5): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET, TOC_POS, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"< 16s 128s 16s 3L 3L") - # Copied from DowI, may be different; praying it isn't - # UGH THIER DIFFERENT! Or the way to calculate them is different - # First, let's try no eigen # (None, None) # HAH TROLLED MYSELF, forgot to conert checksum to hex - MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - unk: int - - # This may not mirror DowI one-to-one, until it's verified, it stays here - # noinspection DuplicatedCode - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - # return True - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - - @property - def version(self) -> VersionLike: - return ArchiveVersion.Dow2 - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': - csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) - - assert rsv_1 == 1 - assert rsv_0 == 0 - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_offset) - - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) - - def pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk - return self.LAYOUT.pack_stream(stream, *args) - +class FileMeta(FileMetaABC): + modified: datetime + verification: FileVerificationType + storage: FileStorageType -# noinspection DuplicatedCode -# Code is identical; but meaning is completely different; using _V5 instead of _V2 -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader +class File(FileABC): + meta: FileMeta -@dataclass(init=False) -class Archive(ArchiveABC, _V5): - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr - TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError +@dataclass +class Folder(FolderABC): + folders: List[Folder] + files: List[File] -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC +class Drive(DriveABC): + folders: List[Folder] + files: List[File] -# noinspection DuplicatedCode -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader +@dataclass +class ArchiveMeta(ArchiveMetaABC): + file_md5: bytes + header_md5: bytes + blob_ptr: BlobPtrs # Cached for MD5 + unk_a: int + FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" + HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" + + @staticmethod + def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): + _BUF_SIZE = 256 * KiB + hasher = hashlib.md5(eigen) + stream.seek(start) + if size is None: + while True: + buffer = stream.read(_BUF_SIZE) + hasher.update(buffer) + if len(buffer) != _BUF_SIZE: + break + else: + read = 0 + while read < size: + buffer = stream.read(min(_BUF_SIZE, size - read)) + read += len(buffer) + hasher.update(buffer) + md5 = bytes.fromhex(hasher.hexdigest()) + if md5 != expected: + raise Md5MismatchError(md5, expected) + + def validate_file_md5(self, stream: BinaryIO): + self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) + + def validate_header_md5(self, stream: BinaryIO): + self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) + + +class Archive(ArchiveABC): + meta: ArchiveMeta + drives: List[Drive] # typing + + TOC_PTRS = _ToCPtrs + VDRIVE_DEF = _DriveDef + FOLDER_DEF = _FolderDef + FILE_DEF = FileDef + VERSION = Version(5) + META_PREFIX_LAYOUT = Struct("<16s 128s 16s 6I") + + _UNIQUE_UNKS: ClassVar = set() # For Analysis + @classmethod + def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): + files = [] + for f_def in file_defs: + meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type) # TODO handle hash + sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) + file = File(names[f_def.name_rel_pos], meta, None, sparse) + files.append(file) + return files -class APIv5(APIvX, _V5): - ArchiveHeader = ArchiveHeader - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive + @classmethod + def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: + encoded_name: bytes + file_md5, encoded_name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = cls.META_PREFIX_LAYOUT.unpack_stream(stream) + decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") + assert RSV_1 == 1, RSV_1 + assert RSV_0 == 0, RSV_0 + # header_pos = stream.tell() + stream.seek(header_pos) + toc_ptrs = cls.TOC_PTRS.unpack(stream) + blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) + meta = ArchiveMeta(file_md5, header_md5, blob_ptrs, unk_a) + cls._UNIQUE_UNKS.add(unk_a) + return decoded_name, meta, blob_ptrs, toc_ptrs diff --git a/src/relic/sga/v7.py b/src/relic/sga/v7.py index 6bdc184..e410070 100644 --- a/src/relic/sga/v7.py +++ b/src/relic/sga/v7.py @@ -1,191 +1,113 @@ from __future__ import annotations - from dataclasses import dataclass -from typing import BinaryIO, Tuple, ClassVar, Type, List, Dict +from datetime import datetime, timezone +from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -from serialization_tools.ioutil import Ptr, WindowPtr from serialization_tools.structx import Struct -from relic.common import VersionLike -from relic.sga import v2 -from relic.sga.abc_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC, ArchiveTableOfContentsABC, VirtualDriveABC, NameBufferABC -from relic.sga.checksums import validate_md5_checksum -from relic.sga.common import ArchiveVersion -from relic.sga.protocols import Archive, ArchiveWalk -from relic.sga.v2 import ArchiveToCPtrABC -from relic.sga import abc_ -from relic.sga.vX import APIvX - -version = ArchiveVersion.v7 - -""" -Format According to ArchiveViewer (CoH2 Mod tools) -Magic: b'Archive_' -Version: UInt16 -Product: UInt16 (I call this minor) -NiceName: bytes[128]/str[64] (utf-16-le) -Header Size: UInt32 -Data Offset : UInt32 -(cached position in file here) -ToC Rel Pos: UInt32 -ToC Count : index_size -Folder Rel Pos: UInt32 -Folder Count : index_size -File Rel Pos: UInt32 -File Count : index_size -Name Buffer Pos : UInt32 -Name Buffer Count/Size ??? : index_size -unk??? : uint32 -Block Size : Uint32 -~~~ -ToC - - -""" - - -class _V7: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! +from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V7): - LAYOUT = Struct("< 64s 64s 5L") +class _ToCPtrs(ToCPtrsABC): + LAYOUT = ToCPtrsABC.LAYOUT_UINT32 -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V7): - LAYOUT = Struct("< 8I") +class _DriveDef(DriveDefABC): + LAYOUT = DriveDefABC.LAYOUT_UINT32 + + +class _FolderDef(FolderDefABC): + LAYOUT = FolderDefABC.LAYOUT_UINT32 @dataclass -class FolderHeader(FolderHeaderABC, _V7): - LAYOUT = Struct("< L 4I") +class FileDef(FileDefABC): + LAYOUT = Struct("<5I 2B 2I") + # v7 Specific data + modified: datetime # Unix EPOCH + verification_type: FileVerificationType + crc: int + hash_pos: int + + @classmethod + def unpack(cls, stream: BinaryIO): + # print(stream.tell()) + name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val, crc, hash_pos = cls.LAYOUT.unpack_stream(stream) + modified = datetime.fromtimestamp(modified_seconds, timezone.utc) + storage_type = FileStorageType(storage_type_val) + verification_type = FileVerificationType(verification_type_val) + return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) @dataclass -class FileHeader(FileHeaderABC, _V7): - LAYOUT = Struct(f"<5L BB 2L") - unk_a: int - unk_b: int - unk_c: int - unk_d: int +class FileMeta(FileMetaABC): + modified: datetime + verification: FileVerificationType + storage: FileStorageType + crc: int + hash: bytes - @property - def compressed(self): - return self.compressed_size < self.decompressed_size - @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, data_off, comp_size, decomp_size, unk_a, unk_b1, unk_b2, unk_c, unk_d = cls.LAYOUT.unpack_stream(stream) - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b1, unk_b2, unk_c, unk_d) +class File(FileABC): + meta: FileMeta - def pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b, self.unk_c, self.unk_d) - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) +@dataclass +class Folder(FolderABC): + folders: List[Folder] + files: List[File] + + +class Drive(DriveABC): + folders: List[Folder] + files: List[File] @dataclass -class ArchiveHeader(ArchiveHeaderABC, _V7): - LAYOUT = Struct("< 128s 3L") - LAYOUT_2 = Struct("< 2L") - TOC_HEADER_SIZE = ArchiveToCPtr.LAYOUT.size - toc_ptr: WindowPtr +class ArchiveMeta(ArchiveMetaABC): + LAYOUT: ClassVar = Struct("<2I") unk_a: int - block_size: int # IDK what this means - - # This may not mirror DowI one-to-one, until it's verified, it stays here - # noinspection DuplicatedCode - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - return True + block_size: int - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and (self.unk_a, self.block_size) == (other.unk_a, other.block_size) \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset + @classmethod + def unpack(cls, stream): + layout = cls.LAYOUT + args = layout.unpack_stream(stream) + return cls(*args) + + def pack(self, stream): + layout = self.LAYOUT + args = self.unk_a, self.block_size + return layout.pack_stream(stream, *args) + + +class Archive(ArchiveABC): + drives: List[Drive] # typing + TOC_PTRS = _ToCPtrs + VDRIVE_DEF = _DriveDef + FOLDER_DEF = _FolderDef + FILE_DEF = FileDef + VERSION = Version(7) + META_PREFIX_LAYOUT = Struct("<128s 3I") @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, unk_a, data_offset, rsv_1 = cls.LAYOUT.unpack_stream(stream) - toc_pos = stream.tell() - stream.seek(cls.TOC_HEADER_SIZE, 1) - toc_size, block_size = cls.LAYOUT_2.unpack_stream(stream) - - # assert toc_size == toc_size_2, (toc_size, toc_size_2) - assert rsv_1 == 1 - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr, data_ptr = WindowPtr(toc_pos, toc_size), WindowPtr(data_offset) - return cls(name, toc_ptr, data_ptr, unk_a,block_size) - - def pack(self, stream: BinaryIO) -> int: - name, toc_size, data_offset = self.name.encode("utf-16-le"), self.toc_ptr.size, self.data_ptr.offset - written = self.TOC_HEADER_SIZE # count - written += self.LAYOUT.pack_stream(stream, name, self.unk_a, data_offset, 1) - stream.seek(self.TOC_HEADER_SIZE, 1) # this will write \0 when seeking past files (unless python api/system api changes) - written += self.LAYOUT.pack_stream(stream, toc_size, self.unk_b) - return written - - -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - # NAME_BUFFER_CLS = NameBuffer - - -@dataclass(init=False) -class Archive(Archive, _V7): - header: ArchiveHeader - _sparse: bool - - def __init__(self, header: ArchiveHeader, drives: List[VirtualDriveABC], _sparse: bool): - self.header = header - self._sparse = _sparse - self.drives = drives - - # redefine function - walk = ArchiveABC.walk + def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): + files = [] + for f_def in file_defs: + meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash + sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) + file = File(names[f_def.name_rel_pos], meta, None, sparse) + files.append(file) + return files @classmethod - def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): - with header.toc_ptr.stream_jump_to(stream) as handle: - toc_ptr = ArchiveToCPtr.unpack(handle) - with header.toc_ptr.stream_jump_to(stream) as handle: - toc_headers = ArchiveTableOfContentsHeaders.unpack(handle, toc_ptr) - toc = ArchiveTableOfContentsABC.create(toc_headers) - - toc.load_toc() - toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up - if not sparse: - with header.data_ptr.stream_jump_to(stream) as handle: - toc.load_data(handle) - - return cls(header, toc.drives, sparse) - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC - - -class APIv7(APIvX, _V7): - ArchiveHeader = ArchiveHeader - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive + def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: + encoded_name: bytes + encoded_name, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) + decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") + assert RSV_1 == 1 + header_pos = stream.tell() + toc_ptrs = cls.TOC_PTRS.unpack(stream) + meta = ArchiveMeta.unpack(stream) + blob_ptrs = BlobPtrs(header_pos, None, data_pos, None) + return decoded_name, meta, blob_ptrs, toc_ptrs diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py index 45db809..3c3b91c 100644 --- a/src/relic/sga/v9.py +++ b/src/relic/sga/v9.py @@ -1,167 +1,105 @@ from __future__ import annotations - from dataclasses import dataclass -from typing import BinaryIO, Dict +from datetime import datetime, timezone +from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -from serialization_tools.ioutil import Ptr, WindowPtr from serialization_tools.structx import Struct -from relic.common import VersionLike -from relic.sga import abc_ -from relic.sga.abc_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC, NameBufferABC -from relic.sga.common import ArchiveVersion -from relic.sga.protocols import ArchiveHeader -from relic.sga.vX import APIvX +from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC -version = ArchiveVersion.v9 +class _ToCPtrs(ToCPtrsABC): + LAYOUT = ToCPtrsABC.LAYOUT_UINT32 -class _V9: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar +class _DriveDef(DriveDefABC): + LAYOUT = DriveDefABC.LAYOUT_UINT32 -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V9): - LAYOUT = Struct("< 64s 64s 4L 4s") - -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V9): - LAYOUT = Struct("< 8L") +class _FolderDef(FolderDefABC): + LAYOUT = FolderDefABC.LAYOUT_UINT32 @dataclass -class FolderHeader(FolderHeaderABC, _V9): - LAYOUT = Struct("< 5L") - - -@dataclass -class FileHeader(FileHeaderABC, _V9): - LAYOUT = Struct("< 7L H L") - unk_a: int - unk_b: int - unk_c: int - unk_d: int # 256? - unk_e: int - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and self.unk_c == other.unk_c and self.unk_d == other.unk_d and self.unk_e == other.unk_e and super().__eq__(other) +class FileDef(FileDefABC): + LAYOUT = Struct("<2I Q 3I 2B I") + # v7 Specific data + modified: datetime # Unix EPOCH + verification_type: FileVerificationType + crc: int + hash_pos: int @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e = cls.LAYOUT.unpack_stream(stream) - # assert unk_a == 0, (unk_a, 0) - # assert unk_b == 0, (unk_b, 0) - # UNK_D is a new compression flag?! - # if comp_size != decomp_size: - # assert unk_d in [256,512], ((comp_size, decomp_size), (unk_d, [256,512]), (name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e)) - # Pulling stuff out of my ass; but dividing them by the max block size gets you 7, 6 respectively - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d, unk_e) - - def pack(self, stream: BinaryIO) -> int: - args = self.name_sub_ptr.offset, self.unk_a, self.data_sub_ptr.offset, self.unk_b, self.compressed_size, self.decompressed_size, self.unk_c, self.unk_d, self.unk_e - return self.LAYOUT.pack_stream(stream, *args) - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size + def unpack(cls, stream: BinaryIO): + name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type_val, storage_type_val, crc = cls.LAYOUT.unpack_stream(stream) + modified = datetime.fromtimestamp(modified_seconds, timezone.utc) + storage_type = FileStorageType(storage_type_val) + verification_type = FileVerificationType(verification_type_val) + return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) @dataclass -class ArchiveHeader(ArchiveHeaderABC, _V9): - # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? - - LAYOUT = Struct(f"<128s QL QQ L 256s") - toc_ptr: WindowPtr - data_ptr: WindowPtr - - unk: bytes +class FileMeta(FileMetaABC): + modified: datetime + verification: FileVerificationType + storage: FileStorageType + crc: int + hash: bytes - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: - """ - Dawn of War III does not contain any checksums, and so will always return true. - :param stream: Ignored - :param fast: Ignored - :param _assert: Ignored - :returns: True - """ - return True +class File(FileABC): + meta: FileMeta - @property - def version(self) -> VersionLike: - return ArchiveVersion.Dow3 - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, toc_pos, toc_size, data_pos, data_size, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) +@dataclass +class Folder(FolderABC): + folders: List[Folder] + files: List[File] - assert rsv_1 == 1 - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_pos, size=data_size) - name = name.decode("utf-16-le").rstrip("\0") - return cls(name, toc_ptr, data_ptr, unk) +class Drive(DriveABC): + folders: List[Folder] + files: List[File] - def pack(self, stream: BinaryIO) -> int: - args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 1, self.unk - return self.LAYOUT.pack_stream(stream, *args) - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version +@dataclass +class ArchiveMeta(ArchiveMetaABC): + sha_256: bytes + unk_a: int + unk_b: int + block_size: int -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC +class Archive(ArchiveABC): + drives: List[Drive] # typing + TOC_PTRS = _ToCPtrs + VDRIVE_DEF = _DriveDef + FOLDER_DEF = _FolderDef + FILE_DEF = FileDef + VERSION = Version(9) + META_PREFIX_LAYOUT = Struct("<128s QIQQ I 256s") + META_POSTFIX_LAYOUT = Struct("<3I") + NAME_BUFFER_USES_COUNT = False + @classmethod + def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): + files = [] + for f_def in file_defs: + meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash + sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) + file = File(names[f_def.name_rel_pos], meta, None, sparse) + files.append(file) + return files -class NameBuffer(NameBufferABC): @classmethod - def unpack(cls, stream: BinaryIO, buffer_size: int) -> Dict[int, str]: - """ Dow III uses a 'buffer size' instead of a 'name count' to unpack names """ - buffer = stream.read(buffer_size) - parts = buffer.split(b"\0") - lookup = {} - offset = 0 - for name in parts: - lookup[offset] = name.decode("ascii") - offset += len(name) + 1 # +1 to account for b'\0' - return lookup - - -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - NAME_BUFFER_CLS = NameBuffer - - -@dataclass(init=False) -class Archive(ArchiveABC, _V9): - TOC_PTR_CLS = ArchiveToCPtr - TOC_HEADERS_CLS = ArchiveTableOfContentsHeaders - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -class APIv9(APIvX, _V9): - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - ArchiveHeader = ArchiveHeader - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive + def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: + encoded_name: bytes + encoded_name, header_pos, header_size, data_pos, data_size, RSV_1, sha_256 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) + decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") + assert RSV_1 == 1, RSV_1 + stream.seek(header_pos) + toc_ptrs = cls.TOC_PTRS.unpack(stream) + unk_a, unk_b, block_size = cls.META_POSTFIX_LAYOUT.unpack_stream(stream) + meta = ArchiveMeta(sha_256,unk_a,unk_b, block_size) + blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, data_size) + return decoded_name, meta, blob_ptrs, toc_ptrs diff --git a/src/relic/sga/vX.py b/src/relic/sga/vX.py index c6036b6..61ccdc9 100644 --- a/src/relic/sga/vX.py +++ b/src/relic/sga/vX.py @@ -1,10 +1,7 @@ -from __future__ import annotations - from types import ModuleType -from typing import Type, Protocol, ClassVar +from typing import Type, Protocol -from relic.sga import abc_, protocols -from relic.sga.common import ArchiveVersion +from relic.sga.core import ArchiveABC, Version class APIvX(Protocol): @@ -13,22 +10,23 @@ class APIvX(Protocol): """ - version:ClassVar[ArchiveVersion] + + version:Version # Archive - Archive: Type[protocols.Archive] - ArchiveHeader: Type[protocols.ArchiveHeader] - # Table Of Contents - ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] - ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] - # Files - FileHeader: Type[abc_.FileHeaderABC] - File: Type[protocols.File] - # Folders - FolderHeader: Type[abc_.FolderHeaderABC] - Folder: Type[protocols.Folder] - # VDrive - VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] - VirtualDrive: Type[protocols.VirtualDrive] + Archive: Type[ArchiveABC] + # ArchiveHeader: Type[protocols.ArchiveHeader] + # # Table Of Contents + # ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] + # ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] + # # Files + # FileHeader: Type[abc_.FileHeaderABC] + # File: Type[protocols.File] + # # Folders + # FolderHeader: Type[abc_.FolderHeaderABC] + # Folder: Type[protocols.Folder] + # # VDrive + # VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] + # VirtualDrive: Type[protocols.VirtualDrive] """Modules implementing vX should define all of the following attributes""" diff --git a/src/relic/sga_old/Archive Reader Notes.txt b/src/relic/sga_old/Archive Reader Notes.txt new file mode 100644 index 0000000..b13d040 --- /dev/null +++ b/src/relic/sga_old/Archive Reader Notes.txt @@ -0,0 +1,88 @@ +According to the modding tools: + +Archive Headers follow this format: +0x0-0x8 b'ARCHIVE_` ~ MAGIC (Nothing we didn't already know) +0x8-0x12 ~ VERSION +0x12-0x16 ~ PRODUCT (always `COH` which is 0), I will continue to call it Minor since 'product' would be implemented per Version + +The following is only valid for formats [4,9] (and when Product is 0) +V[4,5] + bytes[16] ~ FileMD5 +V[Any] + str[128] "utf-16-le" ~ NAME +V[4,5] + bytes[16] ~ HeaderMD5 +V[9] + UInt64 ~ toc_pos +V[8] + UInt32 ~ toc_pos +UInt32 ~ toc_size +V[9] + UInt64 ~ data_pos + Uint64 ~ data_size +V[8] + UInt32 ~ data_pos + Uint32 ~ data_size +V[4,5,6,7] + UInt32 ~ data_pos +UInt32 ~ ??? (RSV_1) # v[2] doesn't have this +V[8,9] + bytes[256] ~ ??? +V[4,5,6,7] + stream.tell() ~ toc_pos + +# starting @ toc_pos +uint32 ~ toc_data pos +uint16/32 ~ toc_data count # v[4] uses uint16, v[5,6,7,8,9] use uint32 +uint32 ~ folder pos +uint16/32 ~ folder count # v[4] uses uint16, v[5,6,7,8,9] use uint32 +uint32 ~ file pos +uint16/32 ~ file count # v[4] uses uint16, v[5,6,7,8,9] use uint32 +uint32 ~ name pos +uint16/32 ~ name count / name buffer size # Varies depending on version; but ArchiveReader doens't use it # v[4] uses uint16, v[5,6,7,8,9] use uint32 +V[7,8,9] + uint32 ~ ??? +V[8,9] + uint32 ~ ??? +V[7,8,9] + uint32 ~ block size + +~~~ TocData +alias : bytes[64] +name : bytes[64] +v[4] + uint16 : fold_start, fold_end, file_Start, file_end, folder_root +v[5,6,7,8,9] + uint32 : fold_start, fold_end, file_Start, file_end, folder_root + +~~~ FolderData +name offset : Uint32 +v[4] + uint16 : fold_start, fold_end, file_Start, file_end +v[5,6,7,8,9] + uint32 : fold_start, fold_end, file_Start, file_end + + +~~~ FileData +name offset : Uint32 +v[8, 9] + uint32 : hash_offset # neat, DowIII has hashing +v[4,5,6,7,8] + uint32 : data_offset +v[9] + uint64 : data_offset +length : uint32 +store length : uint32 +modified : uint32 # unix epoch? +verification type : byte (FileVerificationType) +storage type : byte (FileStorageType) +v[6,7,8,9] + crc : uin32 +v[7] + hash_offset : uint32 # CoH2 also has offset, but it's at the end of the header? + + + + + + diff --git a/src/relic/sga/__init__.py b/src/relic/sga_old/__init__.py similarity index 75% rename from src/relic/sga/__init__.py rename to src/relic/sga_old/__init__.py index 95f6b8b..d3d2a82 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga_old/__init__.py @@ -13,10 +13,10 @@ # # __all__.extend(folder.__all__) # # __all__.extend(toc.__all__) # # __all__.extend(vdrive.__all__) -from relic.sga.v2 import APIv2 -from relic.sga.v5 import APIv5 -from relic.sga.v7 import APIv7 -from relic.sga.v9 import APIv9 +from relic.sga_old.v2 import APIv2 +from relic.sga_old.v5 import APIv5 +from relic.sga_old.v7_old import APIv7 +from relic.sga_old.v9 import APIv9 __APIS = [APIv2,APIv5,APIv7,APIv9] APIS = {api.version:api for api in __APIS} \ No newline at end of file diff --git a/src/relic/sga_old/archive.py b/src/relic/sga_old/archive.py new file mode 100644 index 0000000..b223d99 --- /dev/null +++ b/src/relic/sga_old/archive.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import BinaryIO, List, ClassVar, Any, Dict + +from serialization_tools.structx import Struct + +from relic.sga_old.abc_ import _VirtualDriveDefinition, VirtualDrive, Folder, FolderDefABC, File, _FileMeta, FileSparseInfo +from relic.sga_old.common import FileStorageType, FileVerificationType, ArchiveVersion +from relic.sga_old.protocols import ArchivePath +from relic.sga_old.v7 import VirtualDriveDefinition, FolderDefinition + +""" +index_size is UInt16 when version <= 4 else Uint32 +Format According to ArchiveViewer (CoH2 Mod tools) +Magic: b'Archive_' +Version: UInt16 +Product: UInt16 (I call this minor) +NiceName: bytes[128]/str[64] (utf-16-le) +Header Size: UInt32 +Data Pos : UInt32 +Header Pos : (cached position in file here) +ToC Rel Pos: UInt32 +ToC Count : index_size +Folder Rel Pos: UInt32 +Folder Count : index_size +File Rel Pos: UInt32 +File Count : index_size +Name Buffer Pos : UInt32 +Name Buffer Count/Size ??? : index_size +unk??? : uint32 +Block Size : Uint32 +~~~ +ToC + + +""" +_UTF_NAME = "128s" + +version = ArchiveVersion(7) + + +class VirtualDriveDefinition_v4(_VirtualDriveDefinition): + LAYOUT = Struct("64s 64s 5H") + + +@dataclass +class FileMeta(_FileMeta): + modified: datetime + verification: FileVerificationType + storage: FileStorageType + crc: int + hash: bytes + + +class FolderDefinition_v4(FolderDefABC): + LAYOUT = Struct("I 4H") + + +@dataclass +class FileDefABC: + name_rel_pos: int + data_rel_pos: int + length: int + store_length: int + modified: datetime # Unix EPOCH + verification_type: FileVerificationType + storage_type: FileStorageType + crc: int + hash_pos: int + + LAYOUT: ClassVar[Struct] = Struct("=5I 2B 2I") + + @classmethod + def unpack(cls, stream: BinaryIO): + args: List[Any] = list(cls.LAYOUT.unpack_stream(stream)) + # args2 = Struct(f"<5L BB 2L").unpack_stream(stream) + # _arg4 = args[4] + args[4] = datetime.fromtimestamp(args[4], timezone.utc) + args[5] = FileVerificationType(args[5]) + args[6] = FileStorageType(args[6]) + return cls(*args) + + + + +@dataclass +class ArchiveMeta: + unk_a: int + block_size: int + + +# Archives consist of 3 'Blobs' + some Metadata +# Magic (Metadata) +# Version / Product (Metadata) +# Meta Blob ( ToC Ptrs / Header Ptr / Data Ptr / other Metadata) +# Header Blob +# ToC Header +# ToC Definitions +# Data Blob +# Raw Bytes for sub-files diff --git a/src/relic/sga/checksums.py b/src/relic/sga_old/checksums.py similarity index 100% rename from src/relic/sga/checksums.py rename to src/relic/sga_old/checksums.py diff --git a/src/relic/sga/common.py b/src/relic/sga_old/common.py similarity index 64% rename from src/relic/sga/common.py rename to src/relic/sga_old/common.py index b3f7743..21a0251 100644 --- a/src/relic/sga/common.py +++ b/src/relic/sga_old/common.py @@ -8,52 +8,43 @@ from serialization_tools.structx import Struct from relic.common import VersionEnum, Version, VersionLike -from relic.sga.protocols import ArchiveWalk, FileCollection, FolderCollection, DriveCollection, Folder, VirtualDrive +from relic.sga_old.protocols import ArchiveWalk, FileCollection, FolderCollection, DriveCollection, Folder, VirtualDrive ArchiveVersionLayout = Struct("< 2H") -class FileVerificationType(Enum): - None_ = 0 # unknown real values, assuming incremental - CRC = 1 # unknown real values, assuming incremental - CRCBlocks = 2 # unknown real values, assuming incremental - MD5Blocks = 3 # unknown real values, assuming incremental - SHA1Blocks = 4 # unknown real values, assuming incremental - -class FileStorageType(Enum): - Store = 0 - StreamCompress = 1 # 16 - BufferCompress = 2 # 32 - - -class ArchiveVersion(VersionEnum): - Unsupported = None - v2 = Version(2) - Dow = v2 - v5 = Version(5) - Dow2 = v5 - v7 = Version(7) - CoH2 = v7 - v9 = Version(9) - Dow3 = v9 - - @classmethod - def unpack_version(cls, stream: BinaryIO) -> Version: - return Version(*ArchiveVersionLayout.unpack_stream(stream)) - - @classmethod - def pack_version(cls, stream: BinaryIO, version: VersionLike) -> int: - if isinstance(version, VersionEnum): - version = version.value - return ArchiveVersionLayout.pack_stream(stream, version.major, version.minor) - - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveVersion: - return ArchiveVersion(cls.unpack_version(stream)) - - def pack(self, stream: BinaryIO) -> int: - return self.pack_version(stream, self) +class ArchiveVersion(Version): + LAYOUT = Version._16 + + +# class ArchiveVersion(VersionEnum): +# Unsupported = None +# v2 = Version(2) +# Dow = v2 +# v5 = Version(5) +# Dow2 = v5 +# v7 = Version(7) +# CoH2 = v7 +# v9 = Version(9) +# Dow3 = v9 +# +# @classmethod +# def unpack_version(cls, stream: BinaryIO) -> Version: +# return Version(*ArchiveVersionLayout.unpack_stream(stream)) +# +# @classmethod +# def pack_version(cls, stream: BinaryIO, version: VersionLike) -> int: +# if isinstance(version, VersionEnum): +# version = version.value +# return ArchiveVersionLayout.pack_stream(stream, version.major, version.minor) +# +# @classmethod +# def unpack(cls, stream: BinaryIO) -> ArchiveVersion: +# return ArchiveVersion(cls.unpack_version(stream)) +# +# def pack(self, stream: BinaryIO) -> int: +# return self.pack_version(stream, self) @dataclass diff --git a/src/relic/sga/io.py b/src/relic/sga_old/io.py similarity index 85% rename from src/relic/sga/io.py rename to src/relic/sga_old/io.py index 6e73b28..f997574 100644 --- a/src/relic/sga/io.py +++ b/src/relic/sga_old/io.py @@ -3,10 +3,10 @@ from typing import Dict, Type, BinaryIO from relic.common import VersionLike -from relic.sga.vX import APIvX -from relic.sga.common import ArchiveMagicWord, ArchiveVersion -from relic.sga.protocols import ArchiveHeader, Archive -from relic import sga +from relic.sga_old.vX import APIvX +from relic.sga_old.common import ArchiveMagicWord, ArchiveVersion +from relic.sga_old.protocols import ArchiveHeader, Archive +from relic import sga_old def unpack_archive_header(versions: Dict[VersionLike, Type[ArchiveHeader]], stream: BinaryIO, read_magic: bool = True) -> ArchiveHeader: @@ -38,7 +38,7 @@ def pack_archive(archive: Archive, stream: BinaryIO, write_magic: bool = True) - def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[VersionLike, APIvX] = None, *, validate: bool = True) -> Archive: - versions = sga.APIS if versions is None else versions + versions = sga_old.APIS if versions is None else versions ArchiveMagicWord.assert_magic_word(stream, True) version = ArchiveVersion.unpack_version(stream) api = versions[version] diff --git a/src/relic/sga/protocols.py b/src/relic/sga_old/protocols.py similarity index 96% rename from src/relic/sga/protocols.py rename to src/relic/sga_old/protocols.py index 712df52..30fa1fd 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga_old/protocols.py @@ -28,10 +28,10 @@ def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bo """ raise NotImplementedError - @classmethod - @property - def version(self) -> VersionLike: - raise NotImplementedError + # @classmethod + # @property + # def version(self) -> VersionLike: + # raise NotImplementedError @classmethod def unpack(cls, stream: BinaryIO) -> ArchiveHeader: diff --git a/src/relic/sga_old/v2.py b/src/relic/sga_old/v2.py new file mode 100644 index 0000000..8ad6504 --- /dev/null +++ b/src/relic/sga_old/v2.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import BinaryIO, Tuple, Type, ClassVar + +from serialization_tools.ioutil import WindowPtr, Ptr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga_old import abc_old_ +from relic.sga_old.abc_old_ import ArchiveHeaderABC, ArchiveABC, FileHeaderABC, FolderHeaderABC, VirtualDriveHeaderABC, ArchiveToCPtrABC, ArchiveTableOfContentsHeadersABC +from relic.sga_old.checksums import validate_md5_checksum +from relic.sga_old.common import ArchiveVersion +from relic.sga_old.vX import APIvX + +version = None # ArchiveVersion.v2 + + +class _V2: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V2): + LAYOUT = Struct("< LH LH LH LH") + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V2): + # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET + LAYOUT = Struct(f"< 16s 128s 16s 2L") + # The eigen value is a guid? also knew that layout looked familiar + MD5_EIGENVALUES = (b"E01519D6-2DB7-4640-AF54-0A23319C56C3", b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF") + toc_ptr: WindowPtr + checksums: Tuple[bytes, bytes] + + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): + ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] + valid = True + indexes = (1,) if fast else (0, 1) + for i in indexes: + valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) + return valid + + # @classmethod + # @property + # def version(cls) -> VersionLike: + # return cls.v + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + csum_a, name, csum_b, toc_size, data_offset = cls.LAYOUT.unpack_stream(stream) + + name = name.decode("utf-16-le").rstrip("\0") + toc_ptr = WindowPtr(offset=stream.tell(), size=toc_size) + data_ptr = WindowPtr(offset=data_offset, size=None) + return cls(name, toc_ptr, data_ptr, (csum_a, csum_b)) + + def pack(self, stream: BinaryIO) -> int: + args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset + return self.LAYOUT.pack_stream(stream, *args) + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] + + +class FileCompressionFlag(Enum): + # Compression flag is either 0 (Decompressed) or 16/32 which are both compressed + # Aside from 0; these appear to be the Window-Sizes for the Zlib Compression (In KibiBytes) + Decompressed = 0 + + Compressed16 = 16 + Compressed32 = 32 + + def compressed(self) -> bool: + return self != FileCompressionFlag.Decompressed + + +@dataclass +class FileHeader(FileHeaderABC, _V2): + # name + LAYOUT = Struct(f"<5L") + compression_flag: FileCompressionFlag + + def __eq__(self, other): + return self.compression_flag == other.compression_flag and super().__eq__(other) + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_offset, compression_flag_value, data_offset, decompressed_size, compressed_size = cls.LAYOUT.unpack_stream(stream) + compression_flag = FileCompressionFlag(compression_flag_value) + name_ptr = Ptr(name_offset) + data_ptr = WindowPtr(data_offset, compressed_size) + return cls(name_ptr, data_ptr, decompressed_size, compressed_size, compression_flag) + + def pack(self, stream: BinaryIO) -> int: + return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.compression_flag.value, self.data_sub_ptr.offset, self.decompressed_size, self.compressed_size) + + @property + def compressed(self): + return self.compression_flag.compressed() + + +@dataclass +class FolderHeader(FolderHeaderABC, _V2): + LAYOUT = Struct("< L 4H") + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V2): + LAYOUT = Struct("< 64s 64s 4H 2s") + + +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +@dataclass(init=False) +class Archive(ArchiveABC, _V2): + TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr + TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +# Class Aliases; don't need to be inherited +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +class APIv2(APIvX, _V2): + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + ArchiveHeader = ArchiveHeader + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/v5.py b/src/relic/sga_old/v5.py new file mode 100644 index 0000000..14e68e1 --- /dev/null +++ b/src/relic/sga_old/v5.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO, Tuple, ClassVar, Type + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga_old import v2 +from relic.sga_old.abc_old_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC +from relic.sga_old.checksums import validate_md5_checksum +from relic.sga_old.common import ArchiveVersion +from relic.sga_old.v2 import ArchiveToCPtrABC +from relic.sga_old import abc_old_ +from relic.sga_old.vX import APIvX + +version = None # ArchiveVersion.v5 + + +class _V5: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V5): + LAYOUT = Struct("< 64s 64s 4H 2s") + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V5): + LAYOUT = v2.ArchiveToCPtr.LAYOUT + + +@dataclass +class FolderHeader(FolderHeaderABC, _V5): + LAYOUT = Struct("< L 4H") + + +@dataclass +class FileHeader(FileHeaderABC, _V5): + LAYOUT = Struct(f"<5L H") + unk_a: int + unk_b: int + + @property + def compressed(self): + return self.compressed_size < self.decompressed_size + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_off, data_off, comp_size, decomp_size, unk_a, unk_b = cls.LAYOUT.unpack_stream(stream) + # Name, File, Compressed, Decompressed, ???, ??? + name_ptr = Ptr(name_off) + data_ptr = Ptr(data_off) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b) + + def pack(self, stream: BinaryIO) -> int: + return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b) + + def __eq__(self, other): + return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V5): + # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET, TOC_POS, RESERVED:1, RESERVED:0?, UNK??? + LAYOUT = Struct(f"< 16s 128s 16s 3L 3L") + # Copied from DowI, may be different; praying it isn't + # UGH THIER DIFFERENT! Or the way to calculate them is different + # First, let's try no eigen # (None, None) # HAH TROLLED MYSELF, forgot to conert checksum to hex + MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) + toc_ptr: WindowPtr + checksums: Tuple[bytes, bytes] + unk: int + + # This may not mirror DowI one-to-one, until it's verified, it stays here + # noinspection DuplicatedCode + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): + # return True + ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] + valid = True + indexes = (1,) if fast else (0, 1) + for i in indexes: + valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) + return valid + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name and self.unk == other.unk \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] + # + # @property + # def version(self) -> VersionLike: + # return ArchiveVersion.Dow2 + + @classmethod + def unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': + csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) + + assert rsv_1 == 1 + assert rsv_0 == 0 + + name = name.decode("utf-16-le").rstrip("\0") + toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) + data_ptr = WindowPtr(offset=data_offset) + + return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) + + def pack(self, stream: BinaryIO) -> int: + args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk + return self.LAYOUT.pack_stream(stream, *args) + + +# noinspection DuplicatedCode +# Code is identical; but meaning is completely different; using _V5 instead of _V2 +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +@dataclass(init=False) +class Archive(ArchiveABC, _V5): + TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr + TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +# noinspection DuplicatedCode +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + + +class APIv5(APIvX, _V5): + ArchiveHeader = ArchiveHeader + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/v9.py b/src/relic/sga_old/v9.py new file mode 100644 index 0000000..dab5d45 --- /dev/null +++ b/src/relic/sga_old/v9.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO, Dict + +from serialization_tools.ioutil import Ptr, WindowPtr +from serialization_tools.structx import Struct + +from relic.common import VersionLike +from relic.sga_old import abc_old_ +from relic.sga_old.abc_old_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC, NameBufferABC +from relic.sga_old.common import ArchiveVersion +from relic.sga_old.protocols import ArchiveHeader +from relic.sga_old.vX import APIvX + +version =None # ArchiveVersion.v9 + + +class _V9: + """Mixin to allow classes to add `version` from the module level to the class level""" + version = version # classvar = modulevar + + +@dataclass +class VirtualDriveHeader(VirtualDriveHeaderABC, _V9): + LAYOUT = Struct("< 64s 64s 4L 4s") + + +@dataclass +class ArchiveToCPtr(ArchiveToCPtrABC, _V9): + LAYOUT = Struct("< 8L") + + +@dataclass +class FolderHeader(FolderHeaderABC, _V9): + LAYOUT = Struct("< 5L") + + +@dataclass +class FileHeader(FileHeaderABC, _V9): + LAYOUT = Struct("< 7L H L") + unk_a: int + unk_b: int + unk_c: int + unk_d: int # 256? + unk_e: int + + def __eq__(self, other): + return self.unk_a == other.unk_a and self.unk_b == other.unk_b and self.unk_c == other.unk_c and self.unk_d == other.unk_d and self.unk_e == other.unk_e and super().__eq__(other) + + @classmethod + def unpack(cls, stream: BinaryIO) -> FileHeader: + name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e = cls.LAYOUT.unpack_stream(stream) + # assert unk_a == 0, (unk_a, 0) + # assert unk_b == 0, (unk_b, 0) + # UNK_D is a new compression flag?! + # if comp_size != decomp_size: + # assert unk_d in [256,512], ((comp_size, decomp_size), (unk_d, [256,512]), (name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e)) + # Pulling stuff out of my ass; but dividing them by the max block size gets you 7, 6 respectively + # Name, File, Compressed, Decompressed, ???, ??? + name_ptr = Ptr(name_off) + data_ptr = Ptr(data_off) + return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d, unk_e) + + def pack(self, stream: BinaryIO) -> int: + args = self.name_sub_ptr.offset, self.unk_a, self.data_sub_ptr.offset, self.unk_b, self.compressed_size, self.decompressed_size, self.unk_c, self.unk_d, self.unk_e + return self.LAYOUT.pack_stream(stream, *args) + + @property + def compressed(self): + return self.compressed_size < self.decompressed_size + + +@dataclass +class ArchiveHeader(ArchiveHeaderABC, _V9): + # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? + + LAYOUT = Struct(f"<128s QL QQ L 256s") + toc_ptr: WindowPtr + data_ptr: WindowPtr + + unk: bytes + + def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: + """ + Dawn of War III does not contain any checksums, and so will always return true. + + :param stream: Ignored + :param fast: Ignored + :param _assert: Ignored + :returns: True + """ + return True + + # @property + # def version(self) -> VersionLike: + # return ArchiveVersion.Dow3 + + @classmethod + def unpack(cls, stream: BinaryIO) -> ArchiveHeader: + name, toc_pos, toc_size, data_pos, data_size, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) + + assert rsv_1 == 1 + toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) + data_ptr = WindowPtr(offset=data_pos, size=data_size) + name = name.decode("utf-16-le").rstrip("\0") + + return cls(name, toc_ptr, data_ptr, unk) + + def pack(self, stream: BinaryIO) -> int: + args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 1, self.unk + return self.LAYOUT.pack_stream(stream, *args) + + def __eq__(self, other): + # TODO make issue to add equality to WindowPtr/Ptr + return self.name == other.name and self.unk == other.unk \ + and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ + and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ + and self.version == other.version + + +File = abc_.FileABC +Folder = abc_.FolderABC +VirtualDrive = abc_.VirtualDriveABC + + +class NameBuffer(NameBufferABC): + @classmethod + def unpack(cls, stream: BinaryIO, buffer_size: int) -> Dict[int, str]: + """ Dow III uses a 'buffer size' instead of a 'name count' to unpack names """ + buffer = stream.read(buffer_size) + parts = buffer.split(b"\0") + lookup = {} + offset = 0 + for name in parts: + lookup[offset] = name.decode("ascii") + offset += len(name) + 1 # +1 to account for b'\0' + return lookup + + +class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): + VDRIVE_HEADER_CLS = VirtualDriveHeader + FOLDER_HEADER_CLS = FolderHeader + FILE_HEADER_CLS = FileHeader + NAME_BUFFER_CLS = NameBuffer + + +@dataclass(init=False) +class Archive(ArchiveABC, _V9): + TOC_PTR_CLS = ArchiveToCPtr + TOC_HEADERS_CLS = ArchiveTableOfContentsHeaders + + def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: + raise NotImplementedError + + +class APIv9(APIvX, _V9): + ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders + ArchiveHeader = ArchiveHeader + FileHeader = FileHeader + FolderHeader = FolderHeader + VirtualDriveHeader = VirtualDriveHeader + Archive = Archive + ArchiveToCPtr = ArchiveToCPtr + File = File + Folder = Folder + VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/vX.py b/src/relic/sga_old/vX.py new file mode 100644 index 0000000..2edf2d9 --- /dev/null +++ b/src/relic/sga_old/vX.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from types import ModuleType +from typing import Type, Protocol, ClassVar + +from relic.sga_old import abc_old_, protocols +from relic.sga_old.common import ArchiveVersion + + +class APIvX(Protocol): + """ + Allows us to have a TYPED OBJECT with required types for each version + + """ + + version:ClassVar[ArchiveVersion] + # Archive + Archive: Type[protocols.Archive] + ArchiveHeader: Type[protocols.ArchiveHeader] + # Table Of Contents + ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] + ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] + # Files + FileHeader: Type[abc_.FileHeaderABC] + File: Type[protocols.File] + # Folders + FolderHeader: Type[abc_.FolderHeaderABC] + Folder: Type[protocols.Folder] + # VDrive + VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] + VirtualDrive: Type[protocols.VirtualDrive] + + +"""Modules implementing vX should define all of the following attributes""" +required_attrs = APIvX.__annotations__.keys() + + +def is_module_api(module: ModuleType): + return all(hasattr(module, attr) for attr in required_attrs) diff --git a/src/relic/sga/writer.py b/src/relic/sga_old/writer.py similarity index 100% rename from src/relic/sga/writer.py rename to src/relic/sga_old/writer.py diff --git a/src/scripts/dump_sga.py b/src/scripts/dump_sga.py index c96c84c..8dd0638 100644 --- a/src/scripts/dump_sga.py +++ b/src/scripts/dump_sga.py @@ -5,11 +5,11 @@ from serialization_tools.walkutil import BlackList, WhiteList, filter_by_path, filter_by_file_extension, collapse_walk_on_files -import relic.sga.common -import relic.sga.io +import relic.sga_old.common +import relic.sga_old.io from relic.config import DowIIIGame, DowIIGame, DowGame, filter_latest_dow_game, get_dow_root_directories -from relic.sga.common.archive import ArchiveMagicWord, Archive +from relic.sga_old.common.archive import ArchiveMagicWord, Archive def __safe_makedirs(path: str, use_dirname: bool = True): diff --git a/src/scripts/universal/sga/common.py b/src/scripts/universal/sga/common.py index 9176f75..3c9484a 100644 --- a/src/scripts/universal/sga/common.py +++ b/src/scripts/universal/sga/common.py @@ -5,7 +5,7 @@ from serialization_tools.walkutil import blacklisted -from relic.sga.common import ArchiveMagicWord +from relic.sga_old.common import ArchiveMagicWord from scripts.universal.common import print_error, print_wrote, print_reading, PrintOptions, SharedExtractorParser SharedSgaParser = argparse.ArgumentParser(parents=[SharedExtractorParser], add_help=False) diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index 5becaa1..425bff4 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,8 +3,8 @@ from pathlib import Path from typing import Dict -import relic.sga.common -import relic.sga.io +import relic.sga_old.common +import relic.sga_old.io from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index 7b6f8b7..de90e4a 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -3,9 +3,9 @@ import pytest -from relic.sga import protocols -from relic.sga.abc_ import ArchiveABC -from relic.sga.protocols import ArchiveWalk +from relic.sga_old import protocols +from relic.sga_old.abc_old_ import ArchiveABC +from relic.sga_old.protocols import ArchiveWalk from tests.helpers import TF from tests.relic.sga.datagen import DowII, DowI, DowIII diff --git a/tests/relic/sga/archive/test_archive_header.py b/tests/relic/sga/archive/test_archive_header.py index 6e6d530..4547c85 100644 --- a/tests/relic/sga/archive/test_archive_header.py +++ b/tests/relic/sga/archive/test_archive_header.py @@ -7,9 +7,9 @@ from serialization_tools.size import KiB, MiB, GiB from relic.common import Version -from relic.sga import protocols as proto, v2, v5, v9 -from relic.sga.checksums import gen_md5_checksum, validate_md5_checksum -from relic.sga.common import ArchiveVersion +from relic.sga_old import protocols as proto, v2, v5, v9 +from relic.sga_old.checksums import gen_md5_checksum, validate_md5_checksum +from relic.sga_old.common import ArchiveVersion from tests.helpers import TF from tests.relic.sga.datagen import DowI, DowII, DowIII @@ -124,7 +124,7 @@ def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): super().test_unpack(buffer, expected) - @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, ArchiveVersion.Dow)]) + @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, v2.version)]) def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) @@ -162,7 +162,7 @@ def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): super().test_validate_checksums(archive, cls) - @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, ArchiveVersion.Dow2)]) + @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, v5.version)]) def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) @@ -198,6 +198,6 @@ def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): super().test_pack(inst, expected) - @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, ArchiveVersion.Dow3)]) + @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, v9.version)]) def test_version(self, archive: proto.ArchiveHeader, expected: Version): super().test_version(archive, expected) diff --git a/tests/relic/sga/datagen.py b/tests/relic/sga/datagen.py index 57848c6..29ac4cb 100644 --- a/tests/relic/sga/datagen.py +++ b/tests/relic/sga/datagen.py @@ -3,10 +3,10 @@ from serialization_tools.ioutil import WindowPtr, Ptr -from relic.sga.protocols import ArchiveHeader -from relic.sga.abc_ import FileABC, FolderABC, VirtualDriveABC, ArchiveTOC -from relic.sga import v2, v5, v9 -from relic.sga.common import ArchiveRange +from relic.sga_old.protocols import ArchiveHeader +from relic.sga_old.abc_old_ import FileABC, FolderABC, VirtualDriveABC, ArchiveTOC +from relic.sga_old import v2, v5, v9 +from relic.sga_old.common import ArchiveRange def encode_and_pad(v: str, byte_size: int, encoding: str) -> bytes: diff --git a/tests/relic/sga/file/test_file_header.py b/tests/relic/sga/file/test_file_header.py index ebf800f..d18ca15 100644 --- a/tests/relic/sga/file/test_file_header.py +++ b/tests/relic/sga/file/test_file_header.py @@ -4,8 +4,8 @@ import pytest from relic.common import VersionLike -from relic.sga.common import ArchiveVersion -from relic.sga.abc_ import FileHeaderABC +from relic.sga_old.common import ArchiveVersion +from relic.sga_old.abc_old_ import FileHeaderABC from tests.relic.sga.datagen import DowI, DowII, DowIII diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py index 0e56ab6..18b5acc 100644 --- a/tests/relic/sga/test_vX_interface.py +++ b/tests/relic/sga/test_vX_interface.py @@ -1,10 +1,10 @@ from typing import Iterable, List, Tuple -import relic.sga -from relic.sga import v2, v5, v9, vX +import relic.sga_old +from relic.sga_old import v2, v5,archive,v7_old, v9, vX import pytest -MODULES = [v2, v5, v9] +MODULES = [v2, v5,v7,v7_old, v9] ATTRS = vX.required_attrs APIS = relic.sga.APIS.values() From 2ca3467c4f1fa757a2a5cd453c586102e3f24471 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sat, 11 Jun 2022 14:15:13 -0800 Subject: [PATCH 08/19] Drop old sga --- src/relic/sga_old/Archive Reader Notes.txt | 88 ------ src/relic/sga_old/__init__.py | 22 -- src/relic/sga_old/archive.py | 102 ------- src/relic/sga_old/checksums.py | 25 -- src/relic/sga_old/common.py | 101 ------- src/relic/sga_old/io.py | 50 ---- src/relic/sga_old/protocols.py | 129 --------- src/relic/sga_old/v2.py | 151 ----------- src/relic/sga_old/v5.py | 157 ----------- src/relic/sga_old/v9.py | 167 ------------ src/relic/sga_old/vX.py | 39 --- src/relic/sga_old/writer.py | 297 --------------------- 12 files changed, 1328 deletions(-) delete mode 100644 src/relic/sga_old/Archive Reader Notes.txt delete mode 100644 src/relic/sga_old/__init__.py delete mode 100644 src/relic/sga_old/archive.py delete mode 100644 src/relic/sga_old/checksums.py delete mode 100644 src/relic/sga_old/common.py delete mode 100644 src/relic/sga_old/io.py delete mode 100644 src/relic/sga_old/protocols.py delete mode 100644 src/relic/sga_old/v2.py delete mode 100644 src/relic/sga_old/v5.py delete mode 100644 src/relic/sga_old/v9.py delete mode 100644 src/relic/sga_old/vX.py delete mode 100644 src/relic/sga_old/writer.py diff --git a/src/relic/sga_old/Archive Reader Notes.txt b/src/relic/sga_old/Archive Reader Notes.txt deleted file mode 100644 index b13d040..0000000 --- a/src/relic/sga_old/Archive Reader Notes.txt +++ /dev/null @@ -1,88 +0,0 @@ -According to the modding tools: - -Archive Headers follow this format: -0x0-0x8 b'ARCHIVE_` ~ MAGIC (Nothing we didn't already know) -0x8-0x12 ~ VERSION -0x12-0x16 ~ PRODUCT (always `COH` which is 0), I will continue to call it Minor since 'product' would be implemented per Version - -The following is only valid for formats [4,9] (and when Product is 0) -V[4,5] - bytes[16] ~ FileMD5 -V[Any] - str[128] "utf-16-le" ~ NAME -V[4,5] - bytes[16] ~ HeaderMD5 -V[9] - UInt64 ~ toc_pos -V[8] - UInt32 ~ toc_pos -UInt32 ~ toc_size -V[9] - UInt64 ~ data_pos - Uint64 ~ data_size -V[8] - UInt32 ~ data_pos - Uint32 ~ data_size -V[4,5,6,7] - UInt32 ~ data_pos -UInt32 ~ ??? (RSV_1) # v[2] doesn't have this -V[8,9] - bytes[256] ~ ??? -V[4,5,6,7] - stream.tell() ~ toc_pos - -# starting @ toc_pos -uint32 ~ toc_data pos -uint16/32 ~ toc_data count # v[4] uses uint16, v[5,6,7,8,9] use uint32 -uint32 ~ folder pos -uint16/32 ~ folder count # v[4] uses uint16, v[5,6,7,8,9] use uint32 -uint32 ~ file pos -uint16/32 ~ file count # v[4] uses uint16, v[5,6,7,8,9] use uint32 -uint32 ~ name pos -uint16/32 ~ name count / name buffer size # Varies depending on version; but ArchiveReader doens't use it # v[4] uses uint16, v[5,6,7,8,9] use uint32 -V[7,8,9] - uint32 ~ ??? -V[8,9] - uint32 ~ ??? -V[7,8,9] - uint32 ~ block size - -~~~ TocData -alias : bytes[64] -name : bytes[64] -v[4] - uint16 : fold_start, fold_end, file_Start, file_end, folder_root -v[5,6,7,8,9] - uint32 : fold_start, fold_end, file_Start, file_end, folder_root - -~~~ FolderData -name offset : Uint32 -v[4] - uint16 : fold_start, fold_end, file_Start, file_end -v[5,6,7,8,9] - uint32 : fold_start, fold_end, file_Start, file_end - - -~~~ FileData -name offset : Uint32 -v[8, 9] - uint32 : hash_offset # neat, DowIII has hashing -v[4,5,6,7,8] - uint32 : data_offset -v[9] - uint64 : data_offset -length : uint32 -store length : uint32 -modified : uint32 # unix epoch? -verification type : byte (FileVerificationType) -storage type : byte (FileStorageType) -v[6,7,8,9] - crc : uin32 -v[7] - hash_offset : uint32 # CoH2 also has offset, but it's at the end of the header? - - - - - - diff --git a/src/relic/sga_old/__init__.py b/src/relic/sga_old/__init__.py deleted file mode 100644 index d3d2a82..0000000 --- a/src/relic/sga_old/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# from . import common, hierarchy, writer -# # from .common import vdrive, archive, folder, file, toc -# -# # __all__ = [ -# # "common", -# # "hierarchy", -# # "writer", -# # ] -# -# # __all__.extend(archive.__all__) -# # __all__.extend(file.__all__) -# # __all__.extend(folder.__all__) -# # __all__.extend(toc.__all__) -# # __all__.extend(vdrive.__all__) -from relic.sga_old.v2 import APIv2 -from relic.sga_old.v5 import APIv5 -from relic.sga_old.v7_old import APIv7 -from relic.sga_old.v9 import APIv9 - -__APIS = [APIv2,APIv5,APIv7,APIv9] -APIS = {api.version:api for api in __APIS} \ No newline at end of file diff --git a/src/relic/sga_old/archive.py b/src/relic/sga_old/archive.py deleted file mode 100644 index b223d99..0000000 --- a/src/relic/sga_old/archive.py +++ /dev/null @@ -1,102 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import BinaryIO, List, ClassVar, Any, Dict - -from serialization_tools.structx import Struct - -from relic.sga_old.abc_ import _VirtualDriveDefinition, VirtualDrive, Folder, FolderDefABC, File, _FileMeta, FileSparseInfo -from relic.sga_old.common import FileStorageType, FileVerificationType, ArchiveVersion -from relic.sga_old.protocols import ArchivePath -from relic.sga_old.v7 import VirtualDriveDefinition, FolderDefinition - -""" -index_size is UInt16 when version <= 4 else Uint32 -Format According to ArchiveViewer (CoH2 Mod tools) -Magic: b'Archive_' -Version: UInt16 -Product: UInt16 (I call this minor) -NiceName: bytes[128]/str[64] (utf-16-le) -Header Size: UInt32 -Data Pos : UInt32 -Header Pos : (cached position in file here) -ToC Rel Pos: UInt32 -ToC Count : index_size -Folder Rel Pos: UInt32 -Folder Count : index_size -File Rel Pos: UInt32 -File Count : index_size -Name Buffer Pos : UInt32 -Name Buffer Count/Size ??? : index_size -unk??? : uint32 -Block Size : Uint32 -~~~ -ToC - - -""" -_UTF_NAME = "128s" - -version = ArchiveVersion(7) - - -class VirtualDriveDefinition_v4(_VirtualDriveDefinition): - LAYOUT = Struct("64s 64s 5H") - - -@dataclass -class FileMeta(_FileMeta): - modified: datetime - verification: FileVerificationType - storage: FileStorageType - crc: int - hash: bytes - - -class FolderDefinition_v4(FolderDefABC): - LAYOUT = Struct("I 4H") - - -@dataclass -class FileDefABC: - name_rel_pos: int - data_rel_pos: int - length: int - store_length: int - modified: datetime # Unix EPOCH - verification_type: FileVerificationType - storage_type: FileStorageType - crc: int - hash_pos: int - - LAYOUT: ClassVar[Struct] = Struct("=5I 2B 2I") - - @classmethod - def unpack(cls, stream: BinaryIO): - args: List[Any] = list(cls.LAYOUT.unpack_stream(stream)) - # args2 = Struct(f"<5L BB 2L").unpack_stream(stream) - # _arg4 = args[4] - args[4] = datetime.fromtimestamp(args[4], timezone.utc) - args[5] = FileVerificationType(args[5]) - args[6] = FileStorageType(args[6]) - return cls(*args) - - - - -@dataclass -class ArchiveMeta: - unk_a: int - block_size: int - - -# Archives consist of 3 'Blobs' + some Metadata -# Magic (Metadata) -# Version / Product (Metadata) -# Meta Blob ( ToC Ptrs / Header Ptr / Data Ptr / other Metadata) -# Header Blob -# ToC Header -# ToC Definitions -# Data Blob -# Raw Bytes for sub-files diff --git a/src/relic/sga_old/checksums.py b/src/relic/sga_old/checksums.py deleted file mode 100644 index 22b9db9..0000000 --- a/src/relic/sga_old/checksums.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -from hashlib import md5 -from typing import BinaryIO - -from serialization_tools.ioutil import Ptr, StreamPtr, iter_read, WindowPtr -from serialization_tools.size import KiB - - -def gen_md5_checksum(stream: BinaryIO, eigen: bytes, buffer_size: int = 64 * KiB, ptr: Ptr = None) -> bytes: - hasher = md5(eigen) if eigen else md5() - ptr = ptr or StreamPtr(stream) # Quick way to preserve stream integrity - with ptr.stream_jump_to(stream) as handle: - for buffer in iter_read(handle, buffer_size): - hasher.update(buffer) - return bytes.fromhex(hasher.hexdigest()) - - -def validate_md5_checksum(stream: BinaryIO, ptr: WindowPtr, eigen: bytes, expected: bytes, buffer_size: int = KiB * 64, _assert: bool = True) -> bool: - result = gen_md5_checksum(stream, eigen, buffer_size, ptr=ptr) - if _assert: - assert expected == result, (expected, result) - return True - else: - return expected == result diff --git a/src/relic/sga_old/common.py b/src/relic/sga_old/common.py deleted file mode 100644 index 21a0251..0000000 --- a/src/relic/sga_old/common.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import Optional, Iterator, BinaryIO, Union - -from serialization_tools.magic import MagicWordIO -from serialization_tools.structx import Struct - -from relic.common import VersionEnum, Version, VersionLike -from relic.sga_old.protocols import ArchiveWalk, FileCollection, FolderCollection, DriveCollection, Folder, VirtualDrive - -ArchiveVersionLayout = Struct("< 2H") - - - -class ArchiveVersion(Version): - LAYOUT = Version._16 - - -# class ArchiveVersion(VersionEnum): -# Unsupported = None -# v2 = Version(2) -# Dow = v2 -# v5 = Version(5) -# Dow2 = v5 -# v7 = Version(7) -# CoH2 = v7 -# v9 = Version(9) -# Dow3 = v9 -# -# @classmethod -# def unpack_version(cls, stream: BinaryIO) -> Version: -# return Version(*ArchiveVersionLayout.unpack_stream(stream)) -# -# @classmethod -# def pack_version(cls, stream: BinaryIO, version: VersionLike) -> int: -# if isinstance(version, VersionEnum): -# version = version.value -# return ArchiveVersionLayout.pack_stream(stream, version.major, version.minor) -# -# @classmethod -# def unpack(cls, stream: BinaryIO) -> ArchiveVersion: -# return ArchiveVersion(cls.unpack_version(stream)) -# -# def pack(self, stream: BinaryIO) -> int: -# return self.pack_version(stream, self) - - -@dataclass -class ArchiveRange: - start: int - end: int - __iterable: Optional[Iterator] = None - - @property - def size(self) -> int: - return self.end - self.start - - # We don't use iterable to avoid x - def __iter__(self) -> ArchiveRange: - self.__iterable = iter(range(self.start, self.end)) - return self - - def __next__(self) -> int: - return next(self.__iterable) - - -ArchiveMagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) - - -def walk(collection: Union[DriveCollection, FolderCollection, FileCollection]) -> ArchiveWalk: - raise TypeError("Use walk() function on collection!") - # drives = collection.drives if isinstance(collection, DriveCollection) else [] - # sub_folders = collection.sub_folders if isinstance(collection, FolderCollection) else [] - # files = collection.files if isinstance(collection, FileCollection) and not isinstance(collection, VirtualDrive) else [] - # - # root_drive = collection if isinstance(collection, VirtualDrive) else None - # root_folder = collection if isinstance(collection, Folder) else None - # - # # TODO optimize - # # logically, we can only walk folder OR drive - # if root_drive is None and root_folder is None and len(sub_folders) == 0 and len(files) == 0: - # # I don't think we need to return ANYTHING if we won't be iterating over it - # pass - # # if len(drives) == 0: # We will only yield this item, so we return this to always iterate over something - # # yield root_drive, root_folder, sub_folders, files - # else: - # yield root_drive, root_folder, sub_folders, files # at least one of these isn't None/Empty so we yield iti - # - # for drive in drives: - # for d, f, folds, files, in walk(drive): - # d = d or drive or root_drive - # f = f or root_folder - # yield d, f, folds, files - # - # for folder in sub_folders: - # for d, f, folds, files in walk(folder): - # d = d or root_drive - # f = f or folder or root_folder - # yield d, f, folds, files diff --git a/src/relic/sga_old/io.py b/src/relic/sga_old/io.py deleted file mode 100644 index f997574..0000000 --- a/src/relic/sga_old/io.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import annotations - -from typing import Dict, Type, BinaryIO - -from relic.common import VersionLike -from relic.sga_old.vX import APIvX -from relic.sga_old.common import ArchiveMagicWord, ArchiveVersion -from relic.sga_old.protocols import ArchiveHeader, Archive -from relic import sga_old - - -def unpack_archive_header(versions: Dict[VersionLike, Type[ArchiveHeader]], stream: BinaryIO, read_magic: bool = True) -> ArchiveHeader: - if read_magic: - ArchiveMagicWord.assert_magic_word(stream, True) - - version = ArchiveVersion.unpack_version(stream) - try: - header_class = versions[version] - except KeyError as e: - raise NotImplementedError(version) from e - - return header_class.unpack(stream) - - -def pack_archive_header(header: ArchiveHeader, stream: BinaryIO, write_magic: bool = True) -> int: - written = 0 - - if write_magic: - written += ArchiveMagicWord.write_magic_word(stream) - - written += ArchiveVersion.pack_version(stream, header.version) - written += header.pack(stream) - return written - - -def pack_archive(archive: Archive, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -def unpack_archive(stream: BinaryIO, sparse: bool = True, versions: Dict[VersionLike, APIvX] = None, *, validate: bool = True) -> Archive: - versions = sga_old.APIS if versions is None else versions - ArchiveMagicWord.assert_magic_word(stream, True) - version = ArchiveVersion.unpack_version(stream) - api = versions[version] - header = api.ArchiveHeader.unpack(stream) - if validate: - header.validate_checksums(stream) - return api.Archive.unpack(stream, header, sparse) # Defer to subclass (ensures packing works as expected) - - diff --git a/src/relic/sga_old/protocols.py b/src/relic/sga_old/protocols.py deleted file mode 100644 index 30fa1fd..0000000 --- a/src/relic/sga_old/protocols.py +++ /dev/null @@ -1,129 +0,0 @@ -from __future__ import annotations - -from pathlib import PurePath, PurePosixPath -from typing import BinaryIO, Protocol, runtime_checkable, List, Optional, Iterable, Tuple - -from serialization_tools.ioutil import Ptr, WindowPtr - -from relic.common import VersionLike - - -@runtime_checkable -class ArchiveHeader(Protocol): - name: str - toc_ptr: Ptr - data_ptr: WindowPtr - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: - """ - Validates header checksums against the content's of the stream. - - The stream should return to its original position when it was passed in. - - :param stream: The binary stream to read from - :param fast: When true, slow checksums may be skipped - :param _assert: When true, an assertion is raised instead of returning False - :returns: True if all checksums match (or the type does not have checksums to validate) - :raises AssertionError: if a checksum does not match and _assert is True - """ - raise NotImplementedError - - # @classmethod - # @property - # def version(self) -> VersionLike: - # raise NotImplementedError - - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - raise NotImplementedError - - def pack(self, stream: BinaryIO) -> int: - raise NotImplementedError - - -@runtime_checkable -class ArchiveWalkable(Protocol): - def walk(self) -> ArchiveWalk: - raise NotImplementedError - - -@runtime_checkable -class DriveCollection(Protocol): - drives: List[VirtualDrive] - - -@runtime_checkable -class FolderCollection(Protocol): - sub_folders: List[Folder] - - -@runtime_checkable -class FileCollection(Protocol): - files: List[File] - - -@runtime_checkable -class FolderChild(Protocol): - parent_folder: Optional[Folder] - - -@runtime_checkable -class DriveChild(Protocol): - parent_drive: Optional[VirtualDrive] - - -@runtime_checkable -class VirtualDrive(FolderCollection, FileCollection, ArchiveWalkable, Protocol): - ... - - -@runtime_checkable -class Folder(FolderCollection, FileCollection, FolderChild, DriveChild, ArchiveWalkable, Protocol): - ... - - -@runtime_checkable -class File(FolderChild, DriveChild, Protocol): - ... - - @property - def full_path(self) -> PurePosixPath: - raise NotImplementedError - - def read_data(self, data_stream, param): - raise NotImplementedError - - -@runtime_checkable -class Archive(DriveCollection, ArchiveWalkable, Protocol): - header: ArchiveHeader - """Sparse represents whether data was loaded on creation.""" - _sparse: bool - - def walk(self) -> ArchiveWalk: - raise NotImplementedError - # return walk(self) - - @classmethod - def unpack(cls, stream: BinaryIO, header: ArchiveHeader, sparse: bool = True): - raise NotImplementedError - # version = header.version - # with header.toc_ptr.stream_jump_to(stream) as handle: - # toc_ptr = ArchiveTableOfContentsPtrABC.unpack_version(handle, version) - # toc_headers = ArchiveTableOfContentsHeadersABC.unpack(handle, toc_ptr, version) - # toc = ArchiveTableOfContentsABC.create(toc_headers) - # - # toc.load_toc() - # toc.build_tree() # ensures walk is unique; avoiding dupes and speeding things up - # if not sparse: - # with header.data_ptr.stream_jump_to(stream) as handle: - # toc.load_data(handle) - - # return cls(header, toc.drives, sparse) - - def pack(self, stream: BinaryIO) -> int: - raise NotImplementedError - - -ArchiveWalk = Iterable[Tuple[Optional[VirtualDrive], Optional[Folder], Iterable[Folder], Iterable[File]]] -ArchivePath = PurePath diff --git a/src/relic/sga_old/v2.py b/src/relic/sga_old/v2.py deleted file mode 100644 index 8ad6504..0000000 --- a/src/relic/sga_old/v2.py +++ /dev/null @@ -1,151 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import BinaryIO, Tuple, Type, ClassVar - -from serialization_tools.ioutil import WindowPtr, Ptr -from serialization_tools.structx import Struct - -from relic.common import VersionLike -from relic.sga_old import abc_old_ -from relic.sga_old.abc_old_ import ArchiveHeaderABC, ArchiveABC, FileHeaderABC, FolderHeaderABC, VirtualDriveHeaderABC, ArchiveToCPtrABC, ArchiveTableOfContentsHeadersABC -from relic.sga_old.checksums import validate_md5_checksum -from relic.sga_old.common import ArchiveVersion -from relic.sga_old.vX import APIvX - -version = None # ArchiveVersion.v2 - - -class _V2: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar - - -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V2): - LAYOUT = Struct("< LH LH LH LH") - - -@dataclass -class ArchiveHeader(ArchiveHeaderABC, _V2): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET - LAYOUT = Struct(f"< 16s 128s 16s 2L") - # The eigen value is a guid? also knew that layout looked familiar - MD5_EIGENVALUES = (b"E01519D6-2DB7-4640-AF54-0A23319C56C3", b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF") - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - # @classmethod - # @property - # def version(cls) -> VersionLike: - # return cls.v - - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - csum_a, name, csum_b, toc_size, data_offset = cls.LAYOUT.unpack_stream(stream) - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=stream.tell(), size=toc_size) - data_ptr = WindowPtr(offset=data_offset, size=None) - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b)) - - def pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset - return self.LAYOUT.pack_stream(stream, *args) - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - - -class FileCompressionFlag(Enum): - # Compression flag is either 0 (Decompressed) or 16/32 which are both compressed - # Aside from 0; these appear to be the Window-Sizes for the Zlib Compression (In KibiBytes) - Decompressed = 0 - - Compressed16 = 16 - Compressed32 = 32 - - def compressed(self) -> bool: - return self != FileCompressionFlag.Decompressed - - -@dataclass -class FileHeader(FileHeaderABC, _V2): - # name - LAYOUT = Struct(f"<5L") - compression_flag: FileCompressionFlag - - def __eq__(self, other): - return self.compression_flag == other.compression_flag and super().__eq__(other) - - @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_offset, compression_flag_value, data_offset, decompressed_size, compressed_size = cls.LAYOUT.unpack_stream(stream) - compression_flag = FileCompressionFlag(compression_flag_value) - name_ptr = Ptr(name_offset) - data_ptr = WindowPtr(data_offset, compressed_size) - return cls(name_ptr, data_ptr, decompressed_size, compressed_size, compression_flag) - - def pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.compression_flag.value, self.data_sub_ptr.offset, self.decompressed_size, self.compressed_size) - - @property - def compressed(self): - return self.compression_flag.compressed() - - -@dataclass -class FolderHeader(FolderHeaderABC, _V2): - LAYOUT = Struct("< L 4H") - - -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V2): - LAYOUT = Struct("< 64s 64s 4H 2s") - - -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - - -@dataclass(init=False) -class Archive(ArchiveABC, _V2): - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr - TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -# Class Aliases; don't need to be inherited -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC - - -class APIv2(APIvX, _V2): - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - ArchiveHeader = ArchiveHeader - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/v5.py b/src/relic/sga_old/v5.py deleted file mode 100644 index 14e68e1..0000000 --- a/src/relic/sga_old/v5.py +++ /dev/null @@ -1,157 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import BinaryIO, Tuple, ClassVar, Type - -from serialization_tools.ioutil import Ptr, WindowPtr -from serialization_tools.structx import Struct - -from relic.common import VersionLike -from relic.sga_old import v2 -from relic.sga_old.abc_old_ import VirtualDriveHeaderABC, FolderHeaderABC, FileHeaderABC, ArchiveHeaderABC, ArchiveABC, ArchiveTableOfContentsHeadersABC -from relic.sga_old.checksums import validate_md5_checksum -from relic.sga_old.common import ArchiveVersion -from relic.sga_old.v2 import ArchiveToCPtrABC -from relic.sga_old import abc_old_ -from relic.sga_old.vX import APIvX - -version = None # ArchiveVersion.v5 - - -class _V5: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar # THIS IS A COPY; NOT A REFERENCE! - - -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V5): - LAYOUT = Struct("< 64s 64s 4H 2s") - - -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V5): - LAYOUT = v2.ArchiveToCPtr.LAYOUT - - -@dataclass -class FolderHeader(FolderHeaderABC, _V5): - LAYOUT = Struct("< L 4H") - - -@dataclass -class FileHeader(FileHeaderABC, _V5): - LAYOUT = Struct(f"<5L H") - unk_a: int - unk_b: int - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size - - @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, data_off, comp_size, decomp_size, unk_a, unk_b = cls.LAYOUT.unpack_stream(stream) - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b) - - def pack(self, stream: BinaryIO) -> int: - return self.LAYOUT.pack_stream(stream, self.name_sub_ptr.offset, self.data_sub_ptr.offset, self.compressed_size, self.decompressed_size, self.unk_a, self.unk_b) - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and super().__eq__(other) - - -@dataclass -class ArchiveHeader(ArchiveHeaderABC, _V5): - # hash, name, hash (repeated), TOC_SIZE, DATA_OFFSET, TOC_POS, RESERVED:1, RESERVED:0?, UNK??? - LAYOUT = Struct(f"< 16s 128s 16s 3L 3L") - # Copied from DowI, may be different; praying it isn't - # UGH THIER DIFFERENT! Or the way to calculate them is different - # First, let's try no eigen # (None, None) # HAH TROLLED MYSELF, forgot to conert checksum to hex - MD5_EIGENVALUES = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - toc_ptr: WindowPtr - checksums: Tuple[bytes, bytes] - unk: int - - # This may not mirror DowI one-to-one, until it's verified, it stays here - # noinspection DuplicatedCode - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True): - # return True - ptrs = [Ptr(self.toc_ptr.offset), self.toc_ptr] - valid = True - indexes = (1,) if fast else (0, 1) - for i in indexes: - valid &= validate_md5_checksum(stream, ptrs[i], self.MD5_EIGENVALUES[i], self.checksums[i], _assert=_assert) - return valid - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version and self.checksums[0] == other.checksums[0] and self.checksums[1] == other.checksums[1] - # - # @property - # def version(self) -> VersionLike: - # return ArchiveVersion.Dow2 - - @classmethod - def unpack(cls, stream: BinaryIO) -> 'ArchiveHeader': - csum_a, name, csum_b, toc_size, data_offset, toc_pos, rsv_1, rsv_0, unk = cls.LAYOUT.unpack_stream(stream) - - assert rsv_1 == 1 - assert rsv_0 == 0 - - name = name.decode("utf-16-le").rstrip("\0") - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_offset) - - return cls(name, toc_ptr, data_ptr, (csum_a, csum_b), unk) - - def pack(self, stream: BinaryIO) -> int: - args = self.checksums[0], self.name.encode("utf-16-le"), self.checksums[1], self.toc_ptr.size, self.data_ptr.offset, self.toc_ptr.offset, 1, 0, self.unk - return self.LAYOUT.pack_stream(stream, *args) - - -# noinspection DuplicatedCode -# Code is identical; but meaning is completely different; using _V5 instead of _V2 -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - - -@dataclass(init=False) -class Archive(ArchiveABC, _V5): - TOC_PTR_CLS: ClassVar[Type[ArchiveToCPtrABC]] = ArchiveToCPtr - TOC_HEADERS_CLS: ClassVar[Type[ArchiveTableOfContentsHeadersABC]] = ArchiveTableOfContentsHeaders - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC - - -# noinspection DuplicatedCode -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - - -class APIv5(APIvX, _V5): - ArchiveHeader = ArchiveHeader - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/v9.py b/src/relic/sga_old/v9.py deleted file mode 100644 index dab5d45..0000000 --- a/src/relic/sga_old/v9.py +++ /dev/null @@ -1,167 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import BinaryIO, Dict - -from serialization_tools.ioutil import Ptr, WindowPtr -from serialization_tools.structx import Struct - -from relic.common import VersionLike -from relic.sga_old import abc_old_ -from relic.sga_old.abc_old_ import VirtualDriveHeaderABC, ArchiveToCPtrABC, FolderHeaderABC, FileHeaderABC, ArchiveABC, ArchiveHeaderABC, ArchiveTableOfContentsHeadersABC, NameBufferABC -from relic.sga_old.common import ArchiveVersion -from relic.sga_old.protocols import ArchiveHeader -from relic.sga_old.vX import APIvX - -version =None # ArchiveVersion.v9 - - -class _V9: - """Mixin to allow classes to add `version` from the module level to the class level""" - version = version # classvar = modulevar - - -@dataclass -class VirtualDriveHeader(VirtualDriveHeaderABC, _V9): - LAYOUT = Struct("< 64s 64s 4L 4s") - - -@dataclass -class ArchiveToCPtr(ArchiveToCPtrABC, _V9): - LAYOUT = Struct("< 8L") - - -@dataclass -class FolderHeader(FolderHeaderABC, _V9): - LAYOUT = Struct("< 5L") - - -@dataclass -class FileHeader(FileHeaderABC, _V9): - LAYOUT = Struct("< 7L H L") - unk_a: int - unk_b: int - unk_c: int - unk_d: int # 256? - unk_e: int - - def __eq__(self, other): - return self.unk_a == other.unk_a and self.unk_b == other.unk_b and self.unk_c == other.unk_c and self.unk_d == other.unk_d and self.unk_e == other.unk_e and super().__eq__(other) - - @classmethod - def unpack(cls, stream: BinaryIO) -> FileHeader: - name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e = cls.LAYOUT.unpack_stream(stream) - # assert unk_a == 0, (unk_a, 0) - # assert unk_b == 0, (unk_b, 0) - # UNK_D is a new compression flag?! - # if comp_size != decomp_size: - # assert unk_d in [256,512], ((comp_size, decomp_size), (unk_d, [256,512]), (name_off, unk_a, data_off, unk_b, comp_size, decomp_size, unk_c, unk_d, unk_e)) - # Pulling stuff out of my ass; but dividing them by the max block size gets you 7, 6 respectively - # Name, File, Compressed, Decompressed, ???, ??? - name_ptr = Ptr(name_off) - data_ptr = Ptr(data_off) - return cls(name_ptr, data_ptr, decomp_size, comp_size, unk_a, unk_b, unk_c, unk_d, unk_e) - - def pack(self, stream: BinaryIO) -> int: - args = self.name_sub_ptr.offset, self.unk_a, self.data_sub_ptr.offset, self.unk_b, self.compressed_size, self.decompressed_size, self.unk_c, self.unk_d, self.unk_e - return self.LAYOUT.pack_stream(stream, *args) - - @property - def compressed(self): - return self.compressed_size < self.decompressed_size - - -@dataclass -class ArchiveHeader(ArchiveHeaderABC, _V9): - # name, TOC_POS, TOC_SIZE, DATA_POS, DATA_SIZE, RESERVED:0?, RESERVED:1, RESERVED:0?, UNK??? - - LAYOUT = Struct(f"<128s QL QQ L 256s") - toc_ptr: WindowPtr - data_ptr: WindowPtr - - unk: bytes - - def validate_checksums(self, stream: BinaryIO, *, fast: bool = True, _assert: bool = True) -> bool: - """ - Dawn of War III does not contain any checksums, and so will always return true. - - :param stream: Ignored - :param fast: Ignored - :param _assert: Ignored - :returns: True - """ - return True - - # @property - # def version(self) -> VersionLike: - # return ArchiveVersion.Dow3 - - @classmethod - def unpack(cls, stream: BinaryIO) -> ArchiveHeader: - name, toc_pos, toc_size, data_pos, data_size, rsv_1, unk = cls.LAYOUT.unpack_stream(stream) - - assert rsv_1 == 1 - toc_ptr = WindowPtr(offset=toc_pos, size=toc_size) - data_ptr = WindowPtr(offset=data_pos, size=data_size) - name = name.decode("utf-16-le").rstrip("\0") - - return cls(name, toc_ptr, data_ptr, unk) - - def pack(self, stream: BinaryIO) -> int: - args = self.name.encode("utf-16-le"), self.toc_ptr.offset, self.toc_ptr.size, self.data_ptr.offset, self.data_ptr.size, 1, self.unk - return self.LAYOUT.pack_stream(stream, *args) - - def __eq__(self, other): - # TODO make issue to add equality to WindowPtr/Ptr - return self.name == other.name and self.unk == other.unk \ - and self.toc_ptr.size == other.toc_ptr.size and self.toc_ptr.offset == other.toc_ptr.offset \ - and self.data_ptr.size == other.data_ptr.size and self.data_ptr.offset == other.data_ptr.offset \ - and self.version == other.version - - -File = abc_.FileABC -Folder = abc_.FolderABC -VirtualDrive = abc_.VirtualDriveABC - - -class NameBuffer(NameBufferABC): - @classmethod - def unpack(cls, stream: BinaryIO, buffer_size: int) -> Dict[int, str]: - """ Dow III uses a 'buffer size' instead of a 'name count' to unpack names """ - buffer = stream.read(buffer_size) - parts = buffer.split(b"\0") - lookup = {} - offset = 0 - for name in parts: - lookup[offset] = name.decode("ascii") - offset += len(name) + 1 # +1 to account for b'\0' - return lookup - - -class ArchiveTableOfContentsHeaders(ArchiveTableOfContentsHeadersABC): - VDRIVE_HEADER_CLS = VirtualDriveHeader - FOLDER_HEADER_CLS = FolderHeader - FILE_HEADER_CLS = FileHeader - NAME_BUFFER_CLS = NameBuffer - - -@dataclass(init=False) -class Archive(ArchiveABC, _V9): - TOC_PTR_CLS = ArchiveToCPtr - TOC_HEADERS_CLS = ArchiveTableOfContentsHeaders - - def pack(self, stream: BinaryIO, write_magic: bool = True) -> int: - raise NotImplementedError - - -class APIv9(APIvX, _V9): - ArchiveTableOfContentsHeaders = ArchiveTableOfContentsHeaders - ArchiveHeader = ArchiveHeader - FileHeader = FileHeader - FolderHeader = FolderHeader - VirtualDriveHeader = VirtualDriveHeader - Archive = Archive - ArchiveToCPtr = ArchiveToCPtr - File = File - Folder = Folder - VirtualDrive = VirtualDrive diff --git a/src/relic/sga_old/vX.py b/src/relic/sga_old/vX.py deleted file mode 100644 index 2edf2d9..0000000 --- a/src/relic/sga_old/vX.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import annotations - -from types import ModuleType -from typing import Type, Protocol, ClassVar - -from relic.sga_old import abc_old_, protocols -from relic.sga_old.common import ArchiveVersion - - -class APIvX(Protocol): - """ - Allows us to have a TYPED OBJECT with required types for each version - - """ - - version:ClassVar[ArchiveVersion] - # Archive - Archive: Type[protocols.Archive] - ArchiveHeader: Type[protocols.ArchiveHeader] - # Table Of Contents - ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] - ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] - # Files - FileHeader: Type[abc_.FileHeaderABC] - File: Type[protocols.File] - # Folders - FolderHeader: Type[abc_.FolderHeaderABC] - Folder: Type[protocols.Folder] - # VDrive - VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] - VirtualDrive: Type[protocols.VirtualDrive] - - -"""Modules implementing vX should define all of the following attributes""" -required_attrs = APIvX.__annotations__.keys() - - -def is_module_api(module: ModuleType): - return all(hasattr(module, attr) for attr in required_attrs) diff --git a/src/relic/sga_old/writer.py b/src/relic/sga_old/writer.py deleted file mode 100644 index 44120fb..0000000 --- a/src/relic/sga_old/writer.py +++ /dev/null @@ -1,297 +0,0 @@ -# TODO Dig through this and see how much can be moved to TOC and if any of it must be a separate file - -# # Cycles aren't supported (and will crash) -# # Multiple parents will be copied -# -# -# def flatten_folders(collection: AbstractDirectory, flattened: List[Folder]) -> Tuple[int, int]: -# start = len(flattened) -# flattened.extend(collection.folders) -# stop = len(flattened) -# return start, stop -# -# -# def flatten_files(collection: AbstractDirectory, flattened: List[File]) -> Tuple[int, int]: -# start = len(flattened) -# flattened.extend(collection.files) -# stop = len(flattened) -# return start, stop -# -# -# # Offset, Count (Items), Size (Bytes) -# def write_virtual_drives(stream: BinaryIO, archive: Archive, version: Version, name_table: Dict[any, int], -# recalculate: bool = False) -> Tuple[int, int, int]: -# running_folder = 0 -# running_file = 0 -# written = 0 -# -# offset = stream.tell() -# for drive in archive.drives: -# folder_count = drive.folder_count(recalculate) -# file_count = drive.file_count(recalculate) -# -# folder = ArchiveRange(running_folder, running_folder + folder_count) -# files = ArchiveRange(running_file, running_file + file_count) -# -# running_folder += folder_count -# running_file += file_count -# -# header = VirtualDriveHeader(drive.path, drive.name, folder, files, folder.start) -# written += header.pack(stream, version) -# -# return offset, len(archive.drives), written -# -# -# def write_names(stream: BinaryIO, archive: Archive) -> Tuple[int, int, int, Dict[str, int]]: -# offset = stream.tell() -# running_total = 0 -# lookup = {} -# written = 0 -# -# def try_write_null_terminated(name: str) -> int: -# if name in lookup: -# return 0 -# # We must use relative offset to data_origin -# lookup[name] = stream.tell() - offset -# terminated_name = name -# if name[-1] != "\0": -# terminated_name += "\0" -# encoded = terminated_name.encode("ascii") -# return stream.write(encoded) -# -# # This will not re-use repeated names; we could change it, but I won't since my brain is over-optimizing this -# # By allowing names to repeat, we avoid perform hash checks in a dictionary (or equality comparisons in a list) -# for drive in archive.drives: -# for _, folders, files in drive.walk(): -# for f in folders: -# written += try_write_null_terminated(f.name) -# running_total += 1 -# for f in files: -# written += try_write_null_terminated(f.name) -# running_total += 1 -# -# return offset, running_total, written, lookup -# -# -# # Offset, Count (Items), Size (Bytes) -# def write_folders(stream: BinaryIO, archive: Archive, version: Version, name_lookup: Dict[str, int], -# recalculate: bool = False) -> Tuple[ -# int, int, int]: -# running_folder = 0 -# running_file = 0 -# written = 0 -# total_folders = 0 -# offset = stream.tell() -# for drive in archive.drives: -# for _, folders, _ in drive.walk(): -# for folder in folders: -# total_folders += 1 -# folder_count = folder.folder_count(recalculate) -# file_count = folder.file_count(recalculate) -# -# folder_range = ArchiveRange(running_folder, running_folder + folder_count) -# file_range = ArchiveRange(running_file, running_file + file_count) -# -# running_folder += folder_count -# running_file += file_count -# -# name_offset = name_lookup[folder.name] -# -# header = FolderHeader(name_offset, folder_range, file_range) -# written += header.pack(stream, version) -# -# return offset, total_folders, written -# -# -# def get_v2_compflag(comp_data: bytes, decomp_data: bytes): -# if len(comp_data) == len(decomp_data): -# return FileCompressionFlag.Decompressed -# flag = (comp_data[0] & 0xF0) >> 4 -# lookup = {7: FileCompressionFlag.Compressed32, 6: FileCompressionFlag.Compressed16} -# return lookup[flag] -# -# -# def get_v9_compflag(comp_data: bytes, decomp_data: bytes): -# if len(comp_data) == len(decomp_data): -# return 0 -# flag = (comp_data[0] & 0xF0) >> 4 -# lookup = {7: FileCompressionFlag.Compressed32, 6: FileCompressionFlag.Compressed16} -# return lookup[flag] -# -# -# # Lookup ~ Offset, Copmressed, Decompressed, Version Args -# # Offset, Count, Byte Size -# def write_file_data(stream: BinaryIO, archive: Archive, version: Version, auto_compress: bool = True) -> Tuple[ -# int, int, int, Dict[File, FileHeader]]: -# offset = stream.tell() -# -# KIBI = 1024 -# Kb16 = 16 * KIBI -# Kb32 = 32 * KIBI -# -# lookup = {} -# -# def write_info(compressed_data: bytes, decompressed_data: bytes) -> FileHeader: -# # We must use relative offset to data_origin -# data_offset = stream.tell() - offset -# -# if version == ArchiveVersion.Dow: -# compression_flag = get_v2_compflag(decompressed_data, decompressed_data) -# header = DowIFileHeader(None, data_offset, len(decompressed_data), len(compressed_data), compression_flag) -# elif version == ArchiveVersion.Dow2: -# header = DowIIFileHeader(None, data_offset, len(decompressed_data), len(compressed_data), 0, 0) -# elif version == ArchiveVersion.Dow3: -# # TODO rename unk_d to compression_flag -# compression_flag = get_v9_compflag(decompressed_data, decompressed_data) -# header = DowIIIFileHeader(None, data_offset, len(decompressed_data), len(compressed_data), 0, 0, 0, compression_flag, 0) -# else: -# raise NotImplementedError(version) -# stream.write(compressed_data) -# return header -# -# for drive in archive.drives: -# for _, _, files in drive.walk(): -# for file in files: -# comp_data = file.data -# decomp_data = file.get_decompressed() -# -# if not auto_compress: # Just dump it and GO! -# header = write_info(comp_data, decomp_data) -# else: -# # This is rather arbitrary, but these are my rules for auto-copmression: -# # Don't compress files that... -# # Are compressed (duh) -# # Are smaller than the largest (16-KibiBytes) compression window -# # When Compressing Files... -# # If the data size is less than 256 KibiBytes -# # Use 16-KbB Window -# # Otherwise -# # Use 32-KbB Window -# if len(comp_data) != len(decomp_data): # Compressed; just write as is -# header = write_info(comp_data, decomp_data) -# elif len(decomp_data) < Kb16: # Too small -# header = write_info(comp_data, decomp_data) -# else: -# if len(decomp_data) < KIBI: # Use Window 16KbB -# compressor = zlib.compressobj(wbits=14) -# else: # Use Window 32KbB -# compressor = zlib.compressobj(wbits=15) -# # Compress; because we are using compression obj, we need to use a temp -# with BytesIO() as temp: -# temp.write(compressor.compress(comp_data)) -# temp.write(compressor.flush()) -# temp.seek(0) -# comp_data = temp.read() -# header = write_info(comp_data, decomp_data) -# lookup[file] = header -# -# stop = stream.tell() -# size = stop - offset -# return offset, len(lookup), size, lookup -# -# -# def write_files(stream: BinaryIO, archive: Archive, version: Version, name_lookup: Dict[str, int], -# data_lookup: Dict[File, FileHeader]) -> Tuple[int, int, int]: -# offset = stream.tell() -# written = 0 -# file_count = 0 -# -# for drive in archive.drives: -# for _, _, files in drive.walk(): -# for file in files: -# header = data_lookup[file] -# header.name_subptr = name_lookup[file.name] -# written += header.pack_version(stream, version) -# file_count += 1 -# -# return offset, file_count, written -# -# -# def write_table_of_contents(stream: BinaryIO, archive: Archive, version: Version, -# data_lookup: Dict[File, FileHeader], recalculate_totals: bool = True) -> Tuple[int, int]: -# if recalculate_totals: -# for d in archive.drives: -# d.folder_count(True) -# d.file_count(True) -# -# toc_offset = stream.tell() -# toc_size = ArchiveToC.get_size(version) -# stream.write(bytes([0x00] * toc_size)) -# -# # Names needs to be computer first, but DOW's layout is Drives, Folders, Files, Names (not that it HAS to be) -# # I follow their pattern for consistency if nothing else -# # THIS ONLY WORKS BECAUSE OFFSETS ARE RELATIVE TO THE NAME OFFSET -# with BytesIO() as name_buffer: -# _, name_count, name_size, name_lookup = write_names(name_buffer, archive) -# -# vd_offset, vd_count, vd_size = write_virtual_drives(stream, archive, version, name_lookup) -# vd_part = OffsetInfo(toc_offset, vd_offset - toc_offset, vd_count) -# -# fold_offset, fold_count, fold_size = write_folders(stream, archive, version, name_lookup) -# fold_part = OffsetInfo(toc_offset, fold_offset - toc_offset, fold_count) -# -# file_offset, file_count, file_size = write_files(stream, archive, version, name_lookup, data_lookup) -# file_part = OffsetInfo(toc_offset, file_offset - toc_offset, file_count) -# -# name_offset = stream.tell() -# name_buffer.seek(0) -# stream.write(name_buffer.read()) -# name_part = FilenameOffsetInfo(toc_offset, name_offset - toc_offset, name_count, name_size) -# -# end = stream.tell() -# # Writeback proper TOC -# toc = ArchiveTableOfContents(vd_part, fold_part, file_part, name_part) -# stream.seek(toc_offset) -# toc.pack(stream, version) -# -# stream.seek(end) -# return toc_offset, end - toc_offset -# -# -# def write_archive(stream: BinaryIO, archive: Archive, auto_compress: bool = True, recalculate_totals: bool = True) -> int: -# version = archive.info.header.version -# -# if version not in [ArchiveVersion.Dow, ArchiveVersion.Dow2, ArchiveVersion.Dow3]: -# raise NotImplementedError(version) -# -# start = stream.tell() -# # PRIMARY HEADER -# archive.info.header.pack(stream) -# -# # SUB HEADER SETUP -# # We need to do a write-back once we know the offsets, sizes, what have you -# subheader_offset = stream.tell() -# -# subheader = ArchiveSubHeader.default(version) -# subheader.pack(stream, version) # Write filler data -# -# # TOC & DATA -# if version == ArchiveVersion.Dow: -# # Unfortunately, we depend on Data Buffer to write TOC, and TOC 'MUST' come immediately after the Sub Header in Sga-V2.0 -# # So we write data to a memory buffer before rewriting to a -# with BytesIO() as data_buffer: -# _, _, _, data_lookup = write_file_data(data_buffer, archive, version, auto_compress) -# toc_offset, toc_size = write_table_of_contents(stream, archive, version, data_lookup, recalculate_totals) -# data_offset = stream.tell() -# data_buffer.seek(0) -# stream.write(data_buffer.read()) -# subheader = ArchiveSubHeader(toc_size, data_offset, toc_offset) -# -# elif version in [ArchiveVersion.Dow2, ArchiveVersion.Dow3]: -# # Since these formats can point to TOC specifically, I write to the stream directly -# data_offset, _, data_size, data_lookup = write_file_data(stream, archive, version, auto_compress) -# toc_offset, toc_size = write_table_of_contents(stream, archive, version, data_lookup, recalculate_totals) -# if version == ArchiveVersion.Dow2: -# subheader = ArchiveSubHeader(toc_size, data_offset, toc_offset, 1, 0, 0) -# elif version == ArchiveVersion.Dow3: -# subheader = ArchiveSubHeader(toc_size, data_offset, toc_offset, None, None, None, 0, 0, 1, -# bytes([0x00] * 256), data_size) -# else: -# raise NotImplementedError(version) # In case I add to the list in the above if and forget to add it here -# -# end = stream.tell() -# stream.seek(subheader_offset) -# subheader.pack(stream, version) -# -# stream.seek(end) -# return end - start From 88f665a9738eeb13c3ef00cfcd9159d6143707ae Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sat, 11 Jun 2022 21:51:44 -0800 Subject: [PATCH 09/19] progress commit --- src/relic/chunky/chunk/header.py | 2 +- src/relic/chunky/chunky/header.py | 13 +- src/relic/sga/__init__.py | 17 + src/relic/sga/_abc.py | 176 ++++ src/relic/sga/_serializers.py | 187 ++++ src/relic/sga/apis.py | 19 + src/relic/sga/core.py | 871 +++++++++++------- src/relic/sga/ov2.py | 121 +++ src/relic/sga/protocols.py | 132 +++ src/relic/sga/v2.py | 124 --- src/relic/sga/v2/__init__.py | 22 + src/relic/sga/v2/_serializers.py | 93 ++ src/relic/sga/v2/core.py | 42 + src/relic/sga/v5.py | 287 +++--- src/relic/sga/v5/__init__.py | 22 + src/relic/sga/v5/_serializers.py | 97 ++ src/relic/sga/v5/core.py | 58 ++ src/relic/sga/v7.py | 237 ++--- src/relic/sga/v7/__init__.py | 22 + src/relic/sga/v7/_serializers.py | 94 ++ src/relic/sga/v7/core.py | 40 + src/relic/sga/v9.py | 221 ++--- src/relic/sga/vX.py | 73 +- src/relic/sga/writer_tester.py | 36 + src/scripts/universal/sga/common.py | 4 +- src/scripts/universal/sga/unpack.py | 64 +- tests/relic/sga/archive/test_archive.py | 36 +- .../relic/sga/archive/test_archive_header.py | 406 ++++---- tests/relic/sga/datagen.py | 2 +- tests/relic/sga/file/test_file_header.py | 134 +-- tests/relic/sga/test_sga.py | 2 +- tests/relic/sga/test_vX_interface.py | 7 +- 32 files changed, 2493 insertions(+), 1168 deletions(-) create mode 100644 src/relic/sga/__init__.py create mode 100644 src/relic/sga/_abc.py create mode 100644 src/relic/sga/_serializers.py create mode 100644 src/relic/sga/apis.py create mode 100644 src/relic/sga/ov2.py create mode 100644 src/relic/sga/protocols.py delete mode 100644 src/relic/sga/v2.py create mode 100644 src/relic/sga/v2/__init__.py create mode 100644 src/relic/sga/v2/_serializers.py create mode 100644 src/relic/sga/v2/core.py create mode 100644 src/relic/sga/v5/__init__.py create mode 100644 src/relic/sga/v5/_serializers.py create mode 100644 src/relic/sga/v5/core.py create mode 100644 src/relic/sga/v7/__init__.py create mode 100644 src/relic/sga/v7/_serializers.py create mode 100644 src/relic/sga/v7/core.py create mode 100644 src/relic/sga/writer_tester.py diff --git a/src/relic/chunky/chunk/header.py b/src/relic/chunky/chunk/header.py index 03b97af..b513ba8 100644 --- a/src/relic/chunky/chunk/header.py +++ b/src/relic/chunky/chunk/header.py @@ -8,7 +8,7 @@ from serialization_tools.vstruct import VStruct from ..chunky.header import ChunkyVersion -from relic.sga.common import VersionLike +from relic.common import VersionLike from ...common import VersionError diff --git a/src/relic/chunky/chunky/header.py b/src/relic/chunky/chunky/header.py index 7fb5b62..bbc3255 100644 --- a/src/relic/chunky/chunky/header.py +++ b/src/relic/chunky/chunky/header.py @@ -1,24 +1,25 @@ from __future__ import annotations from dataclasses import dataclass +from enum import Enum from typing import BinaryIO, Dict, Type from serialization_tools.magic import MagicWordIO, MagicWord from serialization_tools.structx import Struct from relic.common import VersionError -from relic.sga.common import VersionEnum, Version, VersionLike - +# from relic.sga.common import VersionEnum, Version, VersionLike +VersionEnum = Version = VersionLike = None ChunkyVersionLayout = Struct("< 2L") -class ChunkyVersion(VersionEnum): +class ChunkyVersion(Enum): Unsupported = None - v0101 = Version(1, 1) + v0101 = None # Version(1, 1) Dow = v0101 # ALIAS for Prettiness - v0301 = Version(3, 1) + v0301 = None # Version(3, 1) Dow2 = v0301 # ALIAS for Prettiness - v4010 = Version(4, 1) + v4010 = None #Version(4, 1) @classmethod def unpack_version(cls, stream: BinaryIO) -> Version: diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py new file mode 100644 index 0000000..2423fda --- /dev/null +++ b/src/relic/sga/__init__.py @@ -0,0 +1,17 @@ +from typing import List, Dict + +from relic.sga import protocols, v2, v5, v7 +from relic.sga.core import Version + +_APIS: List[protocols.API] = [v2.API, v5.API, v7.API] +apis: Dict[Version, protocols.API] = {api.version: api for api in _APIS} + +__all__ = [ + "v2", + "v5", + "v7", + "v9", + "protocols", + "core", + "apis" +] diff --git a/src/relic/sga/_abc.py b/src/relic/sga/_abc.py new file mode 100644 index 0000000..52d28e3 --- /dev/null +++ b/src/relic/sga/_abc.py @@ -0,0 +1,176 @@ +from __future__ import annotations + +import zlib +from abc import ABC +from contextlib import contextmanager +from dataclasses import dataclass +from io import BytesIO +from pathlib import PurePath +from typing import List, Optional, Tuple, BinaryIO, Type, Generic + +from relic.sga import protocols as p +from relic.sga.protocols import TFileMetadata, StorageType, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer +from relic.sga.core import Version + + +def _build_io_path(name: str, parent: Optional[p.IONode]) -> PurePath: + if parent is not None and isinstance(parent, p.IOPathable): + return parent.path / name + else: + return PurePath(name) + + +@dataclass +class _FileLazyInfo: + jump_to: int + packed_size: int + unpacked_size: int + stream: BinaryIO + + def read(self, decompress: bool) -> bytes: + jump_back = self.stream.tell() + self.stream.seek(self.jump_to) + buffer = self.stream.read(self.packed_size) + if decompress and self.packed_size != self.unpacked_size: + buffer = zlib.decompress(buffer) + assert len(buffer) == self.unpacked_size # TODO Raise Exception instead + self.stream.seek(jump_back) + return buffer + + +@dataclass +class DriveDef: + alias: str + name: str + root_folder: int + folder_range: Tuple[int, int] + file_range: Tuple[int, int] + + +@dataclass +class FolderDef: + name_pos: int + folder_range: Tuple[int, int] + file_range: Tuple[int, int] + + +@dataclass +class FileDefABC: + name_pos: int + data_pos: int + length_on_disk: int + length_in_archive: int + storage_type: StorageType + + +@dataclass +class File(p.File[TFileMetadata]): + name: str + data: Optional[bytes] + storage_type: StorageType + metadata: Optional[TFileMetadata] = None + parent: Optional[IONode] = None + _lazy_info: Optional[_FileLazyInfo] = None + _is_compressed: bool = None + + @contextmanager + def open(self, read_only: bool = True) -> BinaryIO: + with BytesIO(self.data) as stream: + yield stream + if not read_only: + stream.seek(0) + self.data = stream.read() + + @property + def is_compressed(self) -> bool: + return self._is_compressed + + def compress(self) -> None: + if not self._is_compressed: + self.data = zlib.compress(self.data) + self._is_compressed = True + + def decompress(self) -> None: + if self._is_compressed: + self.data = zlib.decompress(self.data) + self._is_compressed = False + + @property + def path(self) -> PurePath: + return _build_io_path(self.name, self.parent) + + +@dataclass +class Folder(p.Folder): + name: str + sub_folders: List[Folder] + files: List[File] + parent: Optional[IONode] = None + + @property + def path(self) -> PurePath: + return _build_io_path(self.name, self.parent) + + def walk(self) -> IOWalk: + yield self, self.sub_folders, self.files + for folder in self.sub_folders: + for inner_walk in folder.walk(): + yield inner_walk + + +@dataclass +class Drive(p.Drive): + alias: str + name: str + sub_folders: List[Folder] + files: List[File] + parent: None = None + __ignore__ = ["parent"] + + @property + def path(self) -> PurePath: + return _build_io_path(f"{self.alias}:", None) + + def walk(self) -> IOWalk: + yield self, self.sub_folders, self.files + for folder in self.sub_folders: + for inner_walk in folder.walk(): + yield inner_walk + + +@dataclass +class Archive(Generic[TMetadata], p.Archive[TMetadata]): + name: str + metadata: TMetadata + drives: List[Drive] + + def walk(self) -> IOWalk: + for drive in self.drives: + for inner_walk in drive.walk(): + yield inner_walk + + +# for good typing; manually define dataclass attributes in construct +# it sucks, but good typing is better than no typing +class API(p.API, ABC): + def __init__(self, version: Version, archive: Type[TArchive], drive: Type[TDrive], folder: Type[TFolder], file: Type[TFile], serializer: APISerializer): + self.version = version + self.Archive = archive + self.Drive = drive + self.Folder = folder + self.File = file + self._serializer = serializer + + def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> TArchive: + return self._serializer.read(stream, lazy, decompress) + + def write(self, stream: BinaryIO, archive: TArchive) -> int: + return self._serializer.write(stream,archive) + + +class APISerializer(Generic[TArchive]): + def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> TArchive: + raise NotImplementedError + + def write(self, stream: BinaryIO, archive: TArchive) -> int: + raise NotImplementedError diff --git a/src/relic/sga/_serializers.py b/src/relic/sga/_serializers.py new file mode 100644 index 0000000..d6e825a --- /dev/null +++ b/src/relic/sga/_serializers.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import BinaryIO, List, Dict, Optional, Callable, Tuple, Iterable + +from serialization_tools.structx import Struct + +from relic.sga import _abc +from relic.sga._abc import DriveDef, FolderDef, FileDefABC as FileDef, _FileLazyInfo, FileDefABC +from relic.sga.protocols import TFileMetadata, IOContainer, StreamSerializer, T, TFile, TDrive + + +@dataclass +class TocHeader: + drive_info: Tuple[int, int] + folder_info: Tuple[int, int] + file_info: Tuple[int, int] + name_info: Tuple[int, int] + + +class TocHeaderSerializer(StreamSerializer[TocHeader]): + def __init__(self, layout: Struct): + self.layout = layout + + def unpack(self, stream: BinaryIO) -> TocHeader: + drive_pos, drive_count, folder_pos, folder_count, file_pos, file_count, name_pos, name_count = self.layout.unpack(stream) + return TocHeader((drive_pos, drive_count), (folder_pos, folder_count), (file_pos, file_count), (name_pos, name_count)) + + def pack(self, stream: BinaryIO, value: TocHeader) -> int: + args = value.drive_info[0], value.drive_info[1], value.folder_info[0], value.folder_info[1], value.file_info[0], value.file_info[1], value.name_info[0], value.name_info[1] + return self.layout.pack_stream(stream, *args) + + +class DriveDefSerializer(StreamSerializer[DriveDef]): + def __init__(self, layout: Struct): + self.layout = layout + + def unpack(self, stream: BinaryIO) -> DriveDef: + alias: bytes + name: bytes + alias, name, folder_start, folder_end, file_start, file_end, root_folder = self.layout.unpack_stream(stream) + alias: str = alias.rstrip(b"\0").decode("ascii") + name: str = name.rstrip(b"\0").decode("ascii") + folder_range = (folder_start, folder_end) + file_range = (file_start, file_end) + return DriveDef(alias=alias, name=name, root_folder=root_folder, folder_range=folder_range, file_range=file_range) + + def pack(self, stream: BinaryIO, value: DriveDef) -> int: + alias: bytes = value.alias.encode("ascii") + name: bytes = value.name.encode("ascii") + args = alias, name, value.folder_range[0], value.folder_range[1], value.file_range[0], value.file_range[1], value.root_folder + return self.layout.pack_stream(stream, *args) + + +class FolderDefSerializer(StreamSerializer[FolderDef]): + def __init__(self, layout: Struct): + self.layout = layout + + def unpack(self, stream: BinaryIO) -> FolderDef: + name_pos, folder_start, folder_end, file_start, file_end, root_folder = self.layout.unpack_stream(stream) + folder_range = (folder_start, folder_end) + file_range = (file_start, file_end) + return FolderDef(name_pos=name_pos, folder_range=folder_range, file_range=file_range) + + def pack(self, stream: BinaryIO, value: FolderDef) -> int: + args = value.name_pos, value.folder_range[0], value.folder_range[1], value.file_range[0], value.file_range[1] + return self.layout.pack_stream(stream, *args) + + +def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderDef], file_defs: List[FileDef], names: Dict[int, str], data_pos: int, stream: BinaryIO, build_file_meta: Optional[Callable[[FileDef], TFileMetadata]] = None) -> Tuple[List[_abc.Drive], List[_abc.File]]: + all_files: List[TFile] = [] + drives: List[TDrive] = [] + for drive_def in drive_defs: + local_folder_defs = folder_defs[drive_def.folder_range[0]:drive_def.folder_range[1]] + local_file_defs = file_defs[drive_def.file_range[0]:drive_def.file_range[1]] + + files: List[TFile] = [] + for file_def in local_file_defs: + name = names[file_def.name_pos] + metadata = build_file_meta(file_def) if build_file_meta is not None else None + lazy_info = _FileLazyInfo(data_pos + file_def.data_pos, file_def.length_in_archive, file_def.length_on_disk, stream) + file = _abc.File(name, None, file_def.storage_type, metadata, None, lazy_info) + files.append(file) + + folders: List[_abc.Folder] = [] + for folder_def in local_folder_defs: + folder_name = names[folder_def.name_pos] + sub_files = files[folder_def.file_range[0]:folder_def.folder_range[1]] + folder = _abc.Folder(folder_name, [], sub_files, None) + folders.append(folder) + + for folder_def, folder in zip(local_folder_defs, folders): + folder.sub_folders = folders[folder_def.folder_range[0]:folder_def.folder_range[1]] + + for folder in folders: + _apply_self_as_parent(folder) + + drive_folder = folders[drive_def.root_folder] + drive = _abc.Drive(drive_def.alias, drive_def.name, drive_folder.sub_folders, drive_folder.files) + _apply_self_as_parent(drive) + all_files.extend(files) + drives.append(drive) + return drives, all_files + + +def _apply_self_as_parent(collection: IOContainer): + for folder in collection.sub_folders: + folder.parent = collection + for file in collection.files: + file.parent = collection + + +def _unpack_helper(stream: BinaryIO, toc_info: Tuple[int, int], header_pos: int, serializer: StreamSerializer[T]) -> List[T]: + stream.seek(header_pos + toc_info[0]) + return [serializer.unpack(stream) for _ in range(toc_info[1])] + + +def _read_toc_definitions(stream: BinaryIO, toc: TocHeader, header_pos: int, drive_serializer: StreamSerializer[DriveDef], folder_serializer: StreamSerializer[FolderDef], file_serializer: StreamSerializer[FileDefABC]): + drives = _unpack_helper(stream, toc.drive_info, header_pos, drive_serializer) + folders = _unpack_helper(stream, toc.drive_info, header_pos, folder_serializer) + files = _unpack_helper(stream, toc.drive_info, header_pos, file_serializer) + return drives, folders, files + + +def _read_toc_names_as_count(stream: BinaryIO, toc_info: Tuple[int, int], header_pos: int, buffer_size: int = 256) -> Dict[int, str]: + stream.seek(header_pos + toc_info[0]) + + names: Dict[int, str] = {} + running_buffer = bytearray() + offset = 0 + while len(names) < toc_info[0]: + buffer = stream.read(buffer_size) + if len(buffer) == 0: + raise Exception("Ran out of data!") # TODO, proper exception + terminal_null = buffer[-1] == b"\0" + parts = buffer.split(b"\0") + if len(parts) > 1: + parts[0] = running_buffer + parts[0] + running_buffer.clear() + if not terminal_null: + running_buffer.extend(parts[-1]) + parts = parts[:-1] + else: + if not terminal_null: + running_buffer.extend(parts[0]) + offset += len(buffer) + continue + + remaining = toc_info[0] - len(names) + available = min(len(parts), remaining) + for _ in range(available): + name = parts[_] + names[offset] = name.decode("ascii") + offset += len(name) + 1 + return names + + +def _read_toc_names_as_size(stream: BinaryIO, toc_info: Tuple[int, int], header_pos: int) -> Dict[int, str]: + stream.seek(header_pos + toc_info[0]) + name_buffer = stream.read(toc_info[1]) + parts = name_buffer.split(b"") + names: Dict[int, str] = {} + offset = 0 + for part in parts: + names[offset] = part.decode("ascii") + offset += len(part) + 1 + return names + + +def _chunked_read(stream: BinaryIO, size: Optional[int] = None, chunk_size: Optional[int] = None) -> Iterable[bytes]: + if size is None and chunk_size is None: + yield stream.read() + elif size is None and chunk_size is not None: + while True: + buffer = stream.read(chunk_size) + yield buffer + if len(buffer) != chunk_size: + break + elif size is not None and chunk_size is None: + yield stream.read(size) + else: + chunks = size // chunk_size + for _ in range(chunks): + yield stream.read(chunk_size) + total_read = chunk_size * chunks + if total_read < size: + yield stream.read(size - total_read) diff --git a/src/relic/sga/apis.py b/src/relic/sga/apis.py new file mode 100644 index 0000000..fbe1302 --- /dev/null +++ b/src/relic/sga/apis.py @@ -0,0 +1,19 @@ +# from typing import Dict, BinaryIO +# +# from relic.sga import ov2, v5, v7, v9, vX +# from relic.sga.core import Version, ArchiveABC, VersionNotSupportedError +# +# __APIS = [ov2.API, v5.API, v7.API, v9.API] +# APIS: Dict[Version, vX.APIvX] = {api.version: api for api in __APIS} +# +# +# def read_archive(stream: BinaryIO, sparse: bool = False, apis: Dict[Version, vX.APIvX] = None) -> ArchiveABC: +# apis = APIS if apis is None else apis +# ArchiveABC.MAGIC.read_magic_word(stream) +# version = Version.unpack(stream) +# try: +# api = apis[version] +# except KeyError: +# raise VersionNotSupportedError(version,list(apis.keys())) +# version.assert_version_matches(api.version) +# return api.Archive._read(stream, sparse) diff --git a/src/relic/sga/core.py b/src/relic/sga/core.py index 2d4e274..aa882c7 100644 --- a/src/relic/sga/core.py +++ b/src/relic/sga/core.py @@ -1,36 +1,57 @@ from __future__ import annotations import zlib +from abc import abstractmethod from dataclasses import dataclass -from datetime import datetime from enum import Enum -from pathlib import PosixPath, PurePosixPath -from typing import ClassVar, BinaryIO, Optional, List, Type, Dict, Tuple +from io import BytesIO +from pathlib import PurePath, PureWindowsPath +from typing import ClassVar, BinaryIO, Optional, List, Type, Dict, Tuple, Any, Protocol, Iterable, Union, Generic, TypeVar from serialization_tools.ioutil import WindowPtr from serialization_tools.magic import MagicWordIO from serialization_tools.structx import Struct MagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) - - -class FileVerificationType(Enum): - None_ = 0 # unknown real values, assuming incremental - CRC = 1 # unknown real values, assuming incremental - CRCBlocks = 2 # unknown real values, assuming incremental - MD5Blocks = 3 # unknown real values, assuming incremental - SHA1Blocks = 4 # unknown real values, assuming incremental - - -class FileStorageType(Enum): - Store = 0 - StreamCompress = 1 # 16 - BufferCompress = 2 # 32 +# +# T = TypeVar("T") +# class StreamSerializer(Generic[T], Protocol): +# def unpack(self, stream:BinaryIO) -> T: +# raise NotImplementedError +# def pack(self, stream:BinaryIO, value:T) -> int: +# raise NotImplementedError +# +# # Dont use dataclass +# class ArchivePathable(Protocol): +# _parent_path: Optional[ArchivePathable] +# +# @property +# def path(self) -> PurePath: +# raise NotImplementedError +# +# +# class ArchiveWalkable(Protocol): +# def walk(self) -> ArchiveWalk: +# raise NotImplementedError +# +# +# class FileVerificationType(Enum): +# None_ = 0 # unknown real values, assuming incremental +# CRC = 1 # unknown real values, assuming incremental +# CRCBlocks = 2 # unknown real values, assuming incremental +# MD5Blocks = 3 # unknown real values, assuming incremental +# SHA1Blocks = 4 # unknown real values, assuming incremental +# +# +# class FileStorageType(Enum): +# Store = 0 +# StreamCompress = 1 # 16 in v2 (old-engine binding) +# BufferCompress = 2 # 32 in v2 (old-engine binding) @dataclass class Version: - """ The Major Version; Relic revers to this as the 'Version' """ + """ The Major Version; Relic refers to this as the 'Version' """ major: int """ The Minor Version; Relic refers to this as the 'Product' """ minor: Optional[int] = 0 @@ -49,7 +70,7 @@ def __eq__(self, other): def __hash__(self): # Realistically; Version will always be <256 # But we could manually set it to something much bigger by accident; and that may cause collisions - return self.major << 32 + self.minor + return self.major << (self.LAYOUT.size // 2) + self.minor @classmethod def unpack(cls, stream: BinaryIO): @@ -80,310 +101,528 @@ def _print_mismatch(name: str, received, expected): return msg + "!" -class VersionMismatchError(Exception): - def __init__(self, version: Version = None, expected: Version = None): - self.version = version - self.expected = expected - - def __str__(self): - return _print_mismatch("Version", self.version, self.expected) - - -class Md5MismatchError(Exception): - def __init__(self, recieved: bytes = None, expected: bytes = None): - self.recieved = recieved +class MismatchError(Exception): + def __init__(self, name: str, received: Any = None, expected: Any = None): + self.name = name + self.received = received self.expected = expected def __str__(self): - return _print_mismatch("MD5", self.recieved, self.expected) - - -def _read_names_as_lookup(stream: BinaryIO, name_count_or_size: int, is_count: bool = True): - BUF_SIZE = 64 # stolen from archive reader - lookup = {} - offset = 0 - if not is_count: - buffer = stream.read(name_count_or_size) # size - names = [_.decode("ascii") for _ in buffer.split(b"\0")] - for name in names: - lookup[offset] = name - offset += len(name) + 1 - return lookup - else: - # THIS GETS COMPLICATED - start_pos = stream.tell() - current_name = b"" - # While we still need to reaad names - while len(lookup) < name_count_or_size: - # Read a partial buffer in - buffer = stream.read(BUF_SIZE) - if len(buffer) == 0: - raise Exception("Buffer ran out of data!") - # Try to do a fast separate on the null byte - enc_names = buffer.split(b"\0") - current_name += enc_names[0] - # Needs more data (no b"\0" was found) - if len(enc_names) == 1 and len(buffer) == BUF_SIZE: - continue - else: - # Handle [0] - lookup[offset] = current_name.decode("ascii") - offset += len(current_name) + 1 - current_name = b"" - # Handle [1,N] by seeking to offset and looping again - stream.seek(start_pos + offset) - continue - return lookup - - -@dataclass -class BlobPtrs: - header_pos: int - header_size: Optional[int] - data_pos: int - data_size: Optional[int] - - -@dataclass -class ToCPtrsABC: - vdrive_rel_pos: int - vdrive_count: int - folder_rel_pos: int - folder_count: int - file_rel_pos: int - file_count: int - name_rel_pos: int - name_count_or_size: int # meaning varies between version - - LAYOUT: ClassVar[Struct] - """ Only 'counts' are uint16s """ - LAYOUT_UINT16: ClassVar = Struct(" bytes: - if self.size_in_archive == 0: - return b"" - else: - with WindowPtr(self.abs_data_pos, self.size_in_archive).stream_jump_to(stream) as window: - file_data = window.read() - if self.storage_type == FileStorageType.Store: - return file_data - elif self.storage_type in [FileStorageType.StreamCompress, FileStorageType.BufferCompress]: - return zlib.decompress(file_data) - else: - raise NotImplementedError(f"Reading a file stored as `{self.storage_type}` is not supported!") - - -@dataclass -class FileMetaABC: - storage: FileStorageType - - -@dataclass -class FileABC: - name: str - meta: FileMetaABC - data: Optional[bytes] = None - sparse_info: Optional[FileSparseInfo] = None - - def read_data(self, stream: BinaryIO): - self.data = self.sparse_info.read(stream) - - -class ArchiveMetaABC: - ... # TODO - - -@dataclass -class ArchiveABC: - MAGIC: ClassVar = MagicWord - VERSION: ClassVar[Version] - name: str - meta: ArchiveMetaABC - drives: List[DriveABC] - - # header_size: int # Not required - # data_offset: int # Not required - - # header_offset: int - - TOC_PTRS: ClassVar[Type[ToCPtrsABC]] - VDRIVE_DEF: ClassVar[Type[DriveDefABC]] - FOLDER_DEF: ClassVar[Type[FolderDefABC]] - FILE_DEF: ClassVar[Type[FileDefABC]] - NAME_BUFFER_USES_COUNT: ClassVar[bool] = True - - @classmethod - def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: - raise NotImplementedError - - @classmethod - def _read_toc(cls, header_stream: BinaryIO, header_pos: int, toc_ptrs: ToCPtrsABC): - vdrive_stream = header_stream - vdrive_stream.seek(header_pos + toc_ptrs.vdrive_rel_pos) - vdrive_defs = [cls.VDRIVE_DEF.unpack(vdrive_stream) for _ in range(toc_ptrs.vdrive_count)] - - folders_stream = header_stream - folders_stream.seek(header_pos + toc_ptrs.folder_rel_pos) - folder_defs = [cls.FOLDER_DEF.unpack(folders_stream) for _ in range(toc_ptrs.folder_count)] - - files_stream = header_stream - files_stream.seek(header_pos + toc_ptrs.file_rel_pos) - file_defs = [cls.FILE_DEF.unpack(files_stream) for _ in range(toc_ptrs.file_count)] - - name_stream = header_stream - name_stream.seek(header_pos + toc_ptrs.name_rel_pos) - names = _read_names_as_lookup(name_stream, toc_ptrs.name_count_or_size, is_count=cls.NAME_BUFFER_USES_COUNT) - - return vdrive_defs, folder_defs, file_defs, names - - @classmethod - def _assemble_files(cls, file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): - raise NotImplementedError - - @classmethod - def _assemble_folders(cls, folder_defs: List[FolderDefABC], files: List[FileABC], names: Dict[int, str]): - folders: List[FolderABC] = [] - for f_def in folder_defs: - full_name = names[f_def.name_rel_pos] - if full_name != "": - name = str(PurePosixPath(full_name).parts[-1]) # hack to get last portion of pathed-name - else: - name = "" - folder = FolderABC(name, None, files[f_def.file_start:f_def.file_end + 1]) - folders.append(folder) - for f_def, folder in zip(folder_defs, folders): - folder.folders = folders[f_def.folder_start:f_def.folder_end + 1] - return folders - - @classmethod - def _assemble_drives(cls, drive_defs: List[DriveDefABC], folders: List[FolderABC]): - drives: List[DriveABC] = [] - for d_def in drive_defs: - folder = folders[d_def.folder_root] - drive = DriveABC(folder.folders, folder.files, d_def.alias, d_def.name) - drives.append(drive) - return drives - - @classmethod - def _assemble_hierarchy(cls, vdrive_defs: List[DriveDefABC], folder_defs: List[FolderDefABC], file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): - files = cls._assemble_files(file_defs, names, data_pos) - folders = cls._assemble_folders(folder_defs, files, names) - vdrives = cls._assemble_drives(vdrive_defs, folders) - return vdrives, folders, files - - @classmethod - def read(cls, stream: BinaryIO, sparse: bool = False): - cls.MAGIC.read_magic_word(stream) - archive_version = Version.unpack(stream) - archive_version.assert_version_matches(cls.VERSION) - name, meta, blob_ptrs, toc_ptrs = cls._unpack_meta(stream) - - # TOC Block - vdrive_defs, folder_defs, file_defs, names = cls._read_toc(stream, blob_ptrs.header_pos, toc_ptrs) - - vdrives, _, files = cls._assemble_hierarchy(vdrive_defs, folder_defs, file_defs, names, blob_ptrs.data_pos) - - if not sparse: - for file in files: - file.read_data(stream) - - return cls(name, meta, vdrives) - - # def walk(self) -> ArchiveWalk: - # for drive in self.drives: - # for _ in drive.walk(): - # yield _ + def __str__(self): + def str_ver(v: Version) -> str: # dont use str(version); too verbose + return f"{v.major}.{v.minor}" + + allowed_str = [str_ver(_) for _ in self.allowed] + return f"Version `{str_ver(self.received)}` is not supported. Versions supported: `{allowed_str}`" + + +# def _read_names_as_lookup(stream: BinaryIO, name_count_or_size: int, is_count: bool = True): +# BUF_SIZE = 64 # stolen from archive reader +# lookup = {} +# offset = 0 +# if not is_count: +# buffer = stream.read(name_count_or_size) # size +# names = [_.decode("ascii") for _ in buffer.split(b"\0")] +# for name in names: +# lookup[offset] = name +# offset += len(name) + 1 +# return lookup +# else: +# # THIS GETS COMPLICATED +# start_pos = stream.tell() +# current_name = b"" +# # While we still need to reaad names +# while len(lookup) < name_count_or_size: +# # Read a partial buffer in +# buffer = stream.read(BUF_SIZE) +# if len(buffer) == 0: +# raise Exception("Buffer ran out of data!") +# # Try to do a fast separate on the null byte +# enc_names = buffer.split(b"\0") +# current_name += enc_names[0] +# # Needs more data (no b"\0" was found) +# if len(enc_names) == 1 and len(buffer) == BUF_SIZE: +# continue +# else: +# # Handle [0] +# lookup[offset] = current_name.decode("ascii") +# offset += len(current_name) + 1 +# current_name = b"" +# # Handle [1,N] by seeking to offset and looping again +# stream.seek(start_pos + offset) +# continue +# return lookup +# +# +# @dataclass +# class BlobPtrs: +# header_pos: int +# header_size: Optional[int] +# data_pos: int +# data_size: Optional[int] +# +# +# @dataclass +# class ToCPtrsABC: +# vdrive_rel_pos: int +# vdrive_count: int +# folder_rel_pos: int +# folder_count: int +# file_rel_pos: int +# file_count: int +# name_rel_pos: int +# name_count_or_size: int # meaning varies between version +# +# LAYOUT: ClassVar[Struct] +# """ Only 'counts' are uint16s """ +# LAYOUT_UINT16: ClassVar = Struct(" PurePath: +# return PurePath(f"{self.alias}:/") +# +# def walk(self) -> ArchiveWalk: +# yield self, self.folders, self.files +# for folder in self.folders: +# for _, local_folder, sub_folders, files in folder.walk(): +# yield self, local_folder, sub_folders, files +# +# +# @dataclass +# class FolderDefABC: +# name_rel_pos: int +# folder_start: int +# folder_end: int +# file_start: int +# file_end: int +# +# LAYOUT: ClassVar[Struct] +# LAYOUT_UINT32: ClassVar = Struct(" PurePath: +# if self._parent_path: +# return self._parent_path.path / self.name +# else: +# return PurePath(self.name) +# +# def walk(self) -> ArchiveWalk: +# yield self, self.folders, self.files +# for folder in self.folders: +# for _ in folder.walk(): +# yield _ +# +# +# @dataclass +# class FileSparseInfo: +# storage_type: FileStorageType # Redundancy +# abs_data_pos: int # Absolute data position +# size_on_disk: int +# size_in_archive: int +# +# def read(self, stream: BinaryIO) -> bytes: +# if self.size_in_archive == 0: +# return b"" +# else: +# with WindowPtr(self.abs_data_pos, self.size_in_archive).stream_jump_to(stream) as window: +# file_data = window.read() +# if self.storage_type == FileStorageType.Store: +# return file_data +# elif self.storage_type in [FileStorageType.StreamCompress, FileStorageType.BufferCompress]: +# return zlib.decompress(file_data) +# else: +# raise NotImplementedError(f"Reading a file stored as `{self.storage_type}` is not supported!") +# +# +# @dataclass +# class FileMetaABC: +# storage: FileStorageType +# +# +# @dataclass +# class FileABC(ArchivePathable): +# name: str +# meta: FileMetaABC +# data: Optional[bytes] = None +# sparse_info: Optional[FileSparseInfo] = None +# _parent_path: Optional[ArchivePathable] = None +# +# @property +# def path(self) -> PurePath: +# if self._parent_path: +# return self._parent_path.path / self.name +# else: +# return PurePath(self.name) +# +# def read_data(self, stream: BinaryIO): +# self.data = self.sparse_info.read(stream) +# +# +# class ArchiveMetaABC: +# ... # TODO +# +# +# class ArchiveFlattener: +# # FILE_DEF_CLS: Type[FileDefABC] = FileDefABC +# FOLDER_DEF_CLS: Type[FolderDefABC] = FolderDefABC +# DRIVE_DEF_CLS: Type[DriveDefABC] = DriveDefABC +# +# def __init__(self, name_stream: BinaryIO, data_stream: BinaryIO, drive_def_cls: Optional[Type[DriveDefABC]] = None, folder_def_cls: Optional[Type[FolderDefABC]] = None): +# if drive_def_cls is not None: +# self.DRIVE_DEF_CLS = drive_def_cls +# if folder_def_cls is not None: +# self.FOLDER_DEF_CLS = folder_def_cls +# +# self.files: List[FileDefABC] = [] +# self.folders: List[FolderDefABC] = [] +# self.drives: List[DriveDefABC] = [] +# self.name_stream: BinaryIO = name_stream +# self._name_stream_offset: int = 0 +# self._data_stream_offset: int = 0 +# self.data_stream: BinaryIO = data_stream +# self._name_lookup: Dict[str, int] = {} +# +# def get_name_rel_pos(self, name: str) -> int: +# if name in self._name_lookup: +# return self._name_lookup[name] +# else: +# this_name_offset = self._name_lookup[name] = self._name_stream_offset +# self._name_stream_offset += self.name_stream.write(name.encode("ascii") + b"\0") +# return this_name_offset +# +# def get_name_rel_pos_from_path(self, pathable: ArchivePathable, root: DriveABC) -> int: +# path = pathable.path +# root_path = root.path +# rel_path = path.relative_to(root_path) +# name = str(rel_path) +# if name == "." and root_path == path: +# name = "" +# return self.get_name_rel_pos(name) +# +# @staticmethod +# def repackage_data(data: bytes, storage: FileStorageType) -> Tuple[bytes, int, int]: +# if storage == storage.Store: +# return data, len(data), len(data) +# else: +# comp_data = zlib.compress(data) +# return comp_data, len(data), len(comp_data) +# +# def get_data_rel_pos(self, data: bytes) -> int: +# offset = self._data_stream_offset +# self.data_stream.write(data) +# return offset +# +# @abstractmethod +# def build_file_def(self, file: FileABC, name_rel_pos: int, data_rel_pos: int, length: int, store_length: int, storage: FileStorageType) -> FileDefABC: +# raise NotImplementedError +# # return FileDefABC(name_rel_pos, data_rel_pos, length, store_length, storage) +# +# def flatten_file(self, file: FileABC): +# name_rel_pos = self.get_name_rel_pos(file.name) # files use name-only +# data_buffer, length, store_length = self.repackage_data(file.data, file.meta.storage) +# data_rel_pos = self.get_data_rel_pos(data_buffer) +# file_def = self.build_file_def(file, name_rel_pos, data_rel_pos, length, store_length, file.meta.storage) # FileDefABC(name_rel_pos,data_rel_pos,length,store_length,file.meta.storage) +# return file_def +# +# def flatten_folder(self, folder: FolderABC, root: DriveABC): +# folder_def_cls:Type[FolderDefABC] = self.FOLDER_DEF_CLS +# name_rel_pos = self.get_name_rel_pos_from_path(folder, root) +# folder_start = len(self.folders) +# file_start = len(self.files) +# folder_def = folder_def_cls(name_rel_pos, folder_start, folder_start + len(folder.folders), file_start, file_start + len(folder.files)) +# +# self.folders.extend([None] * len(folder.folders)) # Reserve space for sub-folders +# self.files.extend([None] * len(folder.files)) # Reserve space for subfiles +# +# for i, sub_folder in enumerate(folder.folders): +# self.folders[folder_start + i] = self.flatten_folder(sub_folder, root) +# for i, file in enumerate(folder.files): +# self.files[file_start + i] = self.flatten_file(file) +# return folder_def +# +# def flatten_drive(self, drive: DriveABC) -> DriveDefABC: +# drive_def_cls: Type[DriveDefABC] = self.DRIVE_DEF_CLS +# drive_folder_root = len(self.folders) +# drive_file_start = len(self.files) +# drive_def = drive_def_cls(drive.alias, drive.name, drive_folder_root, None, drive_file_start, None, drive_folder_root) +# +# self.folders.extend([None]) # Reserve space for root +# +# self.folders[drive_folder_root] = self.flatten_folder(drive, drive) # drive is technically a folder; but this should be fixed for better type-safety +# +# drive_def.folder_end = len(self.folders) +# drive_def.file_end = len(self.files) +# return drive_def +# +# def flatten_archive(self, archive: ArchiveABC): +# for drive in archive.drives: +# drive_def = self.flatten_drive(drive) +# self.drives.append(drive_def) +# +# @dataclass +# class ArchiveABC(ArchiveWalkable): +# MAGIC: ClassVar = MagicWord +# VERSION: ClassVar[Version] +# name: str +# meta: ArchiveMetaABC +# drives: List[DriveABC] +# +# # header_size: int # Not required +# # data_offset: int # Not required +# +# # header_offset: int +# +# TOC_PTRS: ClassVar[Type[ToCPtrsABC]] +# VDRIVE_DEF: ClassVar[Type[DriveDefABC]] +# FOLDER_DEF: ClassVar[Type[FolderDefABC]] +# FILE_DEF: ClassVar[Type[FileDefABC]] +# NAME_BUFFER_USES_COUNT: ClassVar[bool] = True +# +# @classmethod +# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: +# raise NotImplementedError +# +# def _pack_meta(self, stream:BinaryIO, ): +# raise NotImplementedError +# +# @classmethod +# def _read_toc(cls, header_stream: BinaryIO, header_pos: int, toc_ptrs: ToCPtrsABC): +# vdrive_stream = header_stream +# vdrive_stream.seek(header_pos + toc_ptrs.vdrive_rel_pos) +# vdrive_defs = [cls.VDRIVE_DEF.unpack(vdrive_stream) for _ in range(toc_ptrs.vdrive_count)] +# +# folders_stream = header_stream +# folders_stream.seek(header_pos + toc_ptrs.folder_rel_pos) +# folder_defs = [cls.FOLDER_DEF.unpack(folders_stream) for _ in range(toc_ptrs.folder_count)] +# +# files_stream = header_stream +# files_stream.seek(header_pos + toc_ptrs.file_rel_pos) +# file_defs = [cls.FILE_DEF.unpack(files_stream) for _ in range(toc_ptrs.file_count)] +# +# name_stream = header_stream +# name_stream.seek(header_pos + toc_ptrs.name_rel_pos) +# names = _read_names_as_lookup(name_stream, toc_ptrs.name_count_or_size, is_count=cls.NAME_BUFFER_USES_COUNT) +# +# return vdrive_defs, folder_defs, file_defs, names +# +# @classmethod +# def _write_toc(cls, header_stream:BinaryIO, drives:List[DriveDefABC], folders:List[FolderDefABC], files:List[FileDefABC], name_buffer:bytes, name_count_or_size:int) -> ToCPtrsABC: +# # The order shouldn't matter; but I follow the generally used format (that I've seen) of drive/folder/file/names +# drive_rel_pos, drive_count = header_stream.tell(), len(drives) +# for drive in drives: +# drive.pack(header_stream) +# +# folder_rel_pos, folder_count = header_stream.tell(), len(folders) +# for folder in folders: +# folder.pack(header_stream) +# +# file_rel_pos, file_count = header_stream.tell(), len(files) +# for file in files: +# file.pack(header_stream) +# +# name_rel_pos, name_count = header_stream.tell(), name_count_or_size +# header_stream.write(name_buffer) +# return cls.TOC_PTRS(drive_rel_pos,drive_count,folder_rel_pos,folder_count,file_rel_pos,file_count,name_rel_pos,name_count) +# +# @classmethod +# def _assemble_files(cls, file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): +# raise NotImplementedError +# +# @classmethod +# def _assemble_folders(cls, folder_defs: List[FolderDefABC], files: List[FileABC], names: Dict[int, str]): +# folders: List[FolderABC] = [] +# for f_def in folder_defs: +# full_name = names[f_def.name_rel_pos] +# if full_name != "": +# name = str(PureWindowsPath(full_name).parts[-1]) # hack to get last portion of pathed-name +# else: +# name = "" +# folder = FolderABC(name, None, files[f_def.file_start:f_def.file_end + 1], _flat_name=full_name) +# folders.append(folder) +# +# for file in folder.files: # Link files to parent +# file._parent_path = folder +# +# for f_def, folder in zip(folder_defs, folders): +# folder.folders = folders[f_def.folder_start:f_def.folder_end + 1] +# +# for subfolder in folder.folders: # Link folders to parent +# subfolder._parent_path = folder +# +# return folders +# +# @classmethod +# def _assemble_drives(cls, drive_defs: List[DriveDefABC], folders: List[FolderABC]): +# drives: List[DriveABC] = [] +# for d_def in drive_defs: +# folder = folders[d_def.folder_root] +# drive = DriveABC(folder.folders, folder.files, d_def.alias, d_def.name) +# drives.append(drive) +# +# # Relink folders/files to drive (instead of folder) +# for file in drive.files: +# file._parent_path = drive +# for folder in drive.folders: +# folder._parent_path = drive +# +# return drives +# +# @classmethod +# def _assemble_hierarchy(cls, vdrive_defs: List[DriveDefABC], folder_defs: List[FolderDefABC], file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): +# files = cls._assemble_files(file_defs, names, data_pos) +# folders = cls._assemble_folders(folder_defs, files, names) +# vdrives = cls._assemble_drives(vdrive_defs, folders) +# return vdrives, folders, files +# +# @classmethod +# def _read(cls, stream: BinaryIO, sparse: bool = False): +# name, meta, blob_ptrs, toc_ptrs = cls._unpack_meta(stream) +# +# # TOC Block +# vdrive_defs, folder_defs, file_defs, names = cls._read_toc(stream, blob_ptrs.header_pos, toc_ptrs) +# +# vdrives, _, files = cls._assemble_hierarchy(vdrive_defs, folder_defs, file_defs, names, blob_ptrs.data_pos) +# +# if not sparse: +# for file in files: +# file.read_data(stream) +# +# return cls(name, meta, vdrives) +# +# def _write_parts(self,out_stream:BinaryIO,): +# +# def _write(self, stream: BinaryIO) -> int: +# with BytesIO() as data_stream: +# with BytesIO() as name_stream: +# flattener = ArchiveFlattener(name_stream,data_stream,drive_def_cls=self.VDRIVE_DEF,folder_def_cls=self.FOLDER_DEF) +# flattener.flatten_archive(self) +# name_stream.seek(0) +# name_buffer = name_stream.read() +# with BytesIO() as header_stream: +# name_count_or_size = len(flattener._name_lookup) if self.NAME_BUFFER_USES_COUNT else len(name_buffer) +# toc = self._write_toc(header_stream,flattener.drives,flattener.folders,flattener.files,name_buffer,name_count_or_size) +# +# with BytesIO() as meta_stream: +# raise NotImplementedError +# +# @classmethod +# def read(cls, stream: BinaryIO, sparse: bool = False): +# magic: MagicWordIO = cls.MAGIC +# magic.read_magic_word(stream) +# archive_version = Version.unpack(stream) +# archive_version.assert_version_matches(cls.VERSION) +# return cls._read(stream, sparse) +# +# def write(self, stream: BinaryIO) -> int: +# magic: MagicWordIO = self.MAGIC +# version: Version = self.VERSION +# written = 0 +# written += magic.write_magic_word(stream) +# written += version.pack(stream) +# written += self._write(stream) +# return written +# +# def walk(self) -> ArchiveWalk: +# for drive in self.drives: +# for _ in drive.walk(): +# yield _ +# +# +# +# ArchiveWalk = Tuple[Union[DriveABC, FolderABC], Iterable[FolderABC], Iterable[FileABC]] diff --git a/src/relic/sga/ov2.py b/src/relic/sga/ov2.py new file mode 100644 index 0000000..5cfd42f --- /dev/null +++ b/src/relic/sga/ov2.py @@ -0,0 +1,121 @@ +# from __future__ import annotations +# +# import hashlib +# from dataclasses import dataclass +# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional +# +# from serialization_tools.size import KiB +# from serialization_tools.structx import Struct +# +# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, MD5MismatchError +# from relic.sga.vX import APIvX +# +# +# class _ToCPtrs(ToCPtrsABC): +# LAYOUT = ToCPtrsABC.LAYOUT_UINT16 +# +# +# class _DriveDef(DriveDefABC): +# LAYOUT = DriveDefABC.LAYOUT_UINT16 +# +# +# class _FolderDef(FolderDefABC): +# LAYOUT = FolderDefABC.LAYOUT_UINT16 +# +# +# version = Version(2) +# +# +# @dataclass +# class FileDef(FileDefABC): +# LAYOUT = Struct("<5I") +# +# @classmethod +# def unpack(cls, stream: BinaryIO): +# name_rel_pos, storage_type_val_v2, data_rel_pos, length, store_length = cls.LAYOUT.unpack_stream(stream) +# storage_type_map = {0: FileStorageType.Store, 16: FileStorageType.StreamCompress, 32: FileStorageType.BufferCompress} +# storage_type = storage_type_map[storage_type_val_v2] +# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type) +# +# +# FileMeta = FileMetaABC +# File = FileABC +# Folder = FolderABC +# Drive = DriveABC +# +# +# @dataclass +# class ArchiveMeta(ArchiveMetaABC): +# file_md5: bytes +# header_md5: bytes +# blob_ptr: BlobPtrs # Cached for MD5 +# FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" +# HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" +# +# @staticmethod +# def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): +# _BUF_SIZE = 256 * KiB +# hasher = hashlib.md5(eigen) +# stream.seek(start) +# if size is None: +# while True: +# buffer = stream.read(_BUF_SIZE) +# hasher.update(buffer) +# if len(buffer) != _BUF_SIZE: +# break +# else: +# read = 0 +# while read < size: +# buffer = stream.read(min(_BUF_SIZE, size - read)) +# read += len(buffer) +# hasher.update(buffer) +# md5 = bytes.fromhex(hasher.hexdigest()) +# if md5 != expected: +# raise MD5MismatchError(md5, expected) +# +# def validate_file_md5(self, stream: BinaryIO): +# self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) +# +# def validate_header_md5(self, stream: BinaryIO): +# self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) +# +# +# class Archive(ArchiveABC): +# meta: ArchiveMeta +# # drives: List[Drive] # typing +# +# TOC_PTRS = _ToCPtrs +# VDRIVE_DEF = _DriveDef +# FOLDER_DEF = _FolderDef +# FILE_DEF = FileDef +# VERSION = version +# META_PREFIX_LAYOUT = Struct("<16s 128s 16s 2I") +# +# @classmethod +# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): +# files = [] +# for f_def in file_defs: +# meta = FileMeta(f_def.storage_type) +# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) +# file = File(names[f_def.name_rel_pos], meta, None, sparse) +# files.append(file) +# return files +# +# @classmethod +# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: +# encoded_name: bytes +# file_md5, encoded_name, header_md5, header_size, data_pos = cls.META_PREFIX_LAYOUT.unpack_stream(stream) +# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") +# header_pos = stream.tell() +# toc_ptrs = cls.TOC_PTRS.unpack(stream) +# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) +# meta = ArchiveMeta(file_md5, header_md5, blob_ptrs) +# return decoded_name, meta, blob_ptrs, toc_ptrs +# +# +# class API(APIvX): +# version = version +# Archive = Archive +# File = File +# Folder = Folder +# Drive = Drive diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py new file mode 100644 index 0000000..2ef0a8c --- /dev/null +++ b/src/relic/sga/protocols.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass +from enum import Enum +from pathlib import PurePath +from types import ModuleType +from typing import TypeVar, Protocol, List, Optional, ForwardRef, Tuple, Iterable, BinaryIO, Type, runtime_checkable + +from relic.common import Version + +FileFwd = ForwardRef("File") +FolderFwd = ForwardRef("Folder") +DriveFwd = ForwardRef("Drive") +ArchiveFwd = ForwardRef("Archive") +TFile = TypeVar("TFile", bound=FileFwd) +TFolder = TypeVar("TFolder", bound=FolderFwd) +TDrive = TypeVar("TDrive", bound=DriveFwd) +TArchive = TypeVar("TArchive", bound=ArchiveFwd) +TMetadata = TypeVar("TMetadata") +TFileMetadata = TypeVar("TFileMetadata") +T = TypeVar("T") + + +@runtime_checkable +class StreamSerializer(Protocol[T]): + def unpack(self, stream: BinaryIO) -> T: + raise NotImplementedError + + def pack(self, stream: BinaryIO, value: T) -> int: + raise NotImplementedError + + +class StorageType(int, Enum): + Store = 0 + BufferCompress = 1 + StreamCompress = 2 + + +class VerificationType(int, Enum): + None_ = 0 # unknown real values, assuming incremental + CRC = 1 # unknown real values, assuming incremental + CRCBlocks = 2 # unknown real values, assuming incremental + MD5Blocks = 3 # unknown real values, assuming incremental + SHA1Blocks = 4 # unknown real values, assuming incremental + + +@runtime_checkable +class IOPathable(Protocol): + @property + def path(self) -> PurePath: + raise NotImplementedError + + +class IONode(Protocol): + parent: Optional[IOContainer] + + +class IOContainer(IONode, Protocol): + sub_folders: List[Folder] + files: List[File] + + +IOWalkStep = Tuple[IOContainer, List[FolderFwd], List[FileFwd]] +IOWalk = Iterable[IOWalkStep] + + +class IOWalkable(Protocol[TFolder, TFile]): + def walk(self) -> IOWalk: + raise NotImplementedError + + +class File(IOPathable, IONode, Protocol[TFileMetadata]): + name: str + data: bytes + storage_type: StorageType + metadata: TFileMetadata + + @property + def is_compressed(self) -> bool: + raise NotImplementedError + + def compress(self) -> None: + raise NotImplementedError + + def decompress(self) -> None: + raise NotImplementedError + + @contextmanager + def open(self, read_only: bool = True) -> BinaryIO: + raise NotImplementedError + + +class Folder(IOWalkable, IOPathable, IOContainer, Protocol): + name: str + + +class Drive(IOWalkable, IOPathable, IOContainer, Protocol): + alias: str + name: str + + +class Archive(IOWalkable, Protocol[TMetadata]): + name: str + metadata: TMetadata + drives: List[Drive] + + +class API(Protocol[TArchive, TDrive, TFolder, TFile]): + version:Version + Archive: Type[TArchive] + Drive: Type[TDrive] + Folder: Type[TFolder] + File: Type[TFile] + + def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> TArchive: + raise NotImplementedError + + def write(self, stream: BinaryIO, archive: TArchive) -> int: + raise NotImplementedError + + +# Hard coded-ish but better then nothing +_required_api_attrs = API.__annotations__.keys() +_required_api_callables = ["read", "write"] + + +def is_module_api(module: ModuleType): + has_attr = all(hasattr(module, attr) for attr in _required_api_attrs) + funcs = dir(module) + has_callables = all(func in funcs for func in _required_api_callables) + return has_attr and has_callables diff --git a/src/relic/sga/v2.py b/src/relic/sga/v2.py deleted file mode 100644 index 1cce002..0000000 --- a/src/relic/sga/v2.py +++ /dev/null @@ -1,124 +0,0 @@ -from __future__ import annotations - -import hashlib -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional - -from serialization_tools.size import KiB -from serialization_tools.structx import Struct - -from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, Md5MismatchError - - -class _ToCPtrs(ToCPtrsABC): - LAYOUT = ToCPtrsABC.LAYOUT_UINT16 - - -class _DriveDef(DriveDefABC): - LAYOUT = DriveDefABC.LAYOUT_UINT16 - - -class _FolderDef(FolderDefABC): - LAYOUT = FolderDefABC.LAYOUT_UINT16 - - -@dataclass -class FileDef(FileDefABC): - LAYOUT = Struct("<5I") - - @classmethod - def unpack(cls, stream: BinaryIO): - name_rel_pos, storage_type_val_v2, data_rel_pos, length, store_length = cls.LAYOUT.unpack_stream(stream) - storage_type_map = {0: FileStorageType.Store, 16: FileStorageType.StreamCompress, 32: FileStorageType.BufferCompress} - storage_type = storage_type_map[storage_type_val_v2] - return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type) - - -FileMeta = FileMetaABC -File = FileABC -Folder = FolderABC -Drive = DriveABC -# class File(FileABC): -# meta: FileMeta - - -# @dataclass -# class Folder(FolderABC): -# folders: List[Folder] -# files: List[File] -# -# -# class Drive(DriveABC): -# folders: List[Folder] -# files: List[File] - - -@dataclass -class ArchiveMeta(ArchiveMetaABC): - file_md5: bytes - header_md5: bytes - blob_ptr: BlobPtrs # Cached for MD5 - FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" - HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" - - @staticmethod - def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): - _BUF_SIZE = 256 * KiB - hasher = hashlib.md5(eigen) - stream.seek(start) - if size is None: - while True: - buffer = stream.read(_BUF_SIZE) - hasher.update(buffer) - if len(buffer) != _BUF_SIZE: - break - else: - read = 0 - while read < size: - buffer = stream.read(min(_BUF_SIZE, size - read)) - read += len(buffer) - hasher.update(buffer) - md5 = bytes.fromhex(hasher.hexdigest()) - if md5 != expected: - raise Md5MismatchError(md5, expected) - - def validate_file_md5(self, stream: BinaryIO): - self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) - - def validate_header_md5(self, stream: BinaryIO): - self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) - - -class Archive(ArchiveABC): - meta: ArchiveMeta - # drives: List[Drive] # typing - - TOC_PTRS = _ToCPtrs - VDRIVE_DEF = _DriveDef - FOLDER_DEF = _FolderDef - FILE_DEF = FileDef - VERSION = Version(2) - META_PREFIX_LAYOUT = Struct("<16s 128s 16s 3I") - - @classmethod - def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): - files = [] - for f_def in file_defs: - meta = FileMeta(f_def.storage_type) - sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) - file = File(names[f_def.name_rel_pos], meta, None, sparse) - files.append(file) - return files - - @classmethod - def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: - encoded_name: bytes - file_md5, encoded_name, header_md5, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) - decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") - assert RSV_1 == 1 - header_pos = stream.tell() - toc_ptrs = cls.TOC_PTRS.unpack(stream) - blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) - meta = ArchiveMeta(file_md5, header_md5, blob_ptrs) - return decoded_name, meta, blob_ptrs, toc_ptrs diff --git a/src/relic/sga/v2/__init__.py b/src/relic/sga/v2/__init__.py new file mode 100644 index 0000000..f5d47c1 --- /dev/null +++ b/src/relic/sga/v2/__init__.py @@ -0,0 +1,22 @@ +from relic.sga import _abc +from relic.sga.v2._serializers import APISerializers +from relic.sga.v2.core import Archive, Drive, Folder, File, ArchiveMetadata, version + + +def _create_api(): + serializer = APISerializers() + api = _abc.API(version, Archive, Drive, Folder, File, serializer) + return api + + +API = _create_api() + +__all__ = [ + "Archive", + "Drive", + "Folder", + "File", + "API", + "version", + "ArchiveMetadata" +] diff --git a/src/relic/sga/v2/_serializers.py b/src/relic/sga/v2/_serializers.py new file mode 100644 index 0000000..5cf7d38 --- /dev/null +++ b/src/relic/sga/v2/_serializers.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from typing import BinaryIO, Dict, ClassVar, Optional + +from serialization_tools.structx import Struct + +from relic.sga import _abc, _serializers as _s +from relic.sga._abc import FileDefABC as FileDef, Archive +from relic.sga.v2 import core +from relic.sga.core import MagicWord, Version +from relic.sga.protocols import StreamSerializer, StorageType + +folder_layout = Struct(" FileDef: + storage_type: int + name_pos, storage_type, data_pos, length_on_disk, length_in_archive = self.layout.unpack_stream(stream) + storage_type: StorageType = self.INT2STORAGE[storage_type] + return FileDef(name_pos, data_pos, length_on_disk, length_in_archive, storage_type) + + def pack(self, stream: BinaryIO, value: FileDef) -> int: + storage_type = self.STORAGE2INT[value.storage_type] + args = value.name_pos, storage_type, value.data_pos, value.length_on_disk, value.length_in_archive + return self.layout.pack_stream(stream, *args) + + +file_serializer = FileDefSerializer(file_layout) +toc_layout = Struct(" Archive: + MagicWord.read_magic_word(stream) + version = Version.unpack(stream) + version.assert_version_matches(self.version) + + name: bytes + file_md5, name, header_md5, header_size, data_pos = self.layout.unpack_stream(stream) + header_pos = stream.tell() + # Seek to header; but we skip that because we are already there + toc_header = self.TocHeader.unpack(stream) + drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) + names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + + if not lazy: + for file in files: + lazy_info: Optional[_abc._FileLazyInfo] = file._lazy_info + if lazy_info is None: + raise Exception("API read files, but failed to create lazy info!") + else: + file.data = lazy_info.read(decompress) + file._lazy_info = None + + name: str = name.rstrip(b"").decode("utf-16-le") + file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) + header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) + metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper) + + return Archive(name, metadata, drives) + + def write(self, stream: BinaryIO, archive: Archive) -> int: + raise NotImplementedError + + def __init__(self): + self.DriveDef = drive_serializer + self.FolderDef = folder_serializer + self.FileDef = file_serializer + self.TocHeader = toc_header_serializer + self.version = core.version + self.layout = Struct("<16s 128s 16s 2I") diff --git a/src/relic/sga/v2/core.py b/src/relic/sga/v2/core.py new file mode 100644 index 0000000..69f600d --- /dev/null +++ b/src/relic/sga/v2/core.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional, BinaryIO + +from relic.sga import _abc +from relic.sga.core import Version + +version = Version(2) + + +@dataclass +class _Md5ChecksumHelper: + expected: bytes + stream: BinaryIO + start: int + size: Optional[int] = None + eigen: Optional[bytes] = None + + def validate(self, stream: BinaryIO = None) -> None: + stream = self.stream if stream is None else stream + stream.seek(self.start) + + +@dataclass +class ArchiveMetadata: + @property + def file_md5(self) -> bytes: + return self._file_md5.expected + + @property + def header_md5(self) -> bytes: + return self._header_md5.expected + + _file_md5: _Md5ChecksumHelper + _header_md5: _Md5ChecksumHelper + + +Archive = _abc.Archive[ArchiveMetadata] +Folder = _abc.Folder +File = _abc.File +Drive = _abc.Drive diff --git a/src/relic/sga/v5.py b/src/relic/sga/v5.py index 9321548..49211b8 100644 --- a/src/relic/sga/v5.py +++ b/src/relic/sga/v5.py @@ -1,138 +1,149 @@ -from __future__ import annotations - -import hashlib -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional - -from serialization_tools.size import KiB -from serialization_tools.structx import Struct - -from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, Md5MismatchError - - -class _ToCPtrs(ToCPtrsABC): - LAYOUT = ToCPtrsABC.LAYOUT_UINT16 - - -class _DriveDef(DriveDefABC): - LAYOUT = DriveDefABC.LAYOUT_UINT16 - - -class _FolderDef(FolderDefABC): - LAYOUT = FolderDefABC.LAYOUT_UINT16 - - -@dataclass -class FileDef(FileDefABC): - LAYOUT = Struct("<5I 2B") - # v7 Specific data - modified: datetime # Unix EPOCH - verification_type: FileVerificationType - - @classmethod - def unpack(cls, stream: BinaryIO): - # print(stream.tell()) - name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val = cls.LAYOUT.unpack_stream(stream) - modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type = FileStorageType(storage_type_val) - verification_type = FileVerificationType(verification_type_val) - return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type) - - -@dataclass -class FileMeta(FileMetaABC): - modified: datetime - verification: FileVerificationType - storage: FileStorageType - - -class File(FileABC): - meta: FileMeta - - -@dataclass -class Folder(FolderABC): - folders: List[Folder] - files: List[File] - - -class Drive(DriveABC): - folders: List[Folder] - files: List[File] - - -@dataclass -class ArchiveMeta(ArchiveMetaABC): - file_md5: bytes - header_md5: bytes - blob_ptr: BlobPtrs # Cached for MD5 - unk_a: int - FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" - HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" - - @staticmethod - def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): - _BUF_SIZE = 256 * KiB - hasher = hashlib.md5(eigen) - stream.seek(start) - if size is None: - while True: - buffer = stream.read(_BUF_SIZE) - hasher.update(buffer) - if len(buffer) != _BUF_SIZE: - break - else: - read = 0 - while read < size: - buffer = stream.read(min(_BUF_SIZE, size - read)) - read += len(buffer) - hasher.update(buffer) - md5 = bytes.fromhex(hasher.hexdigest()) - if md5 != expected: - raise Md5MismatchError(md5, expected) - - def validate_file_md5(self, stream: BinaryIO): - self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) - - def validate_header_md5(self, stream: BinaryIO): - self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) - - -class Archive(ArchiveABC): - meta: ArchiveMeta - drives: List[Drive] # typing - - TOC_PTRS = _ToCPtrs - VDRIVE_DEF = _DriveDef - FOLDER_DEF = _FolderDef - FILE_DEF = FileDef - VERSION = Version(5) - META_PREFIX_LAYOUT = Struct("<16s 128s 16s 6I") - - _UNIQUE_UNKS: ClassVar = set() # For Analysis - - @classmethod - def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): - files = [] - for f_def in file_defs: - meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type) # TODO handle hash - sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) - file = File(names[f_def.name_rel_pos], meta, None, sparse) - files.append(file) - return files - - @classmethod - def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: - encoded_name: bytes - file_md5, encoded_name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = cls.META_PREFIX_LAYOUT.unpack_stream(stream) - decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") - assert RSV_1 == 1, RSV_1 - assert RSV_0 == 0, RSV_0 - # header_pos = stream.tell() - stream.seek(header_pos) - toc_ptrs = cls.TOC_PTRS.unpack(stream) - blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) - meta = ArchiveMeta(file_md5, header_md5, blob_ptrs, unk_a) - cls._UNIQUE_UNKS.add(unk_a) - return decoded_name, meta, blob_ptrs, toc_ptrs +# from __future__ import annotations +# +# import hashlib +# from dataclasses import dataclass +# from datetime import datetime, timezone +# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional +# +# from serialization_tools.size import KiB +# from serialization_tools.structx import Struct +# +# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, MD5MismatchError +# from relic.sga.vX import APIvX +# +# version = Version(5) +# +# +# class _ToCPtrs(ToCPtrsABC): +# LAYOUT = ToCPtrsABC.LAYOUT_UINT16 +# +# +# class _DriveDef(DriveDefABC): +# LAYOUT = DriveDefABC.LAYOUT_UINT16 +# +# +# class _FolderDef(FolderDefABC): +# LAYOUT = FolderDefABC.LAYOUT_UINT16 +# +# +# @dataclass +# class FileDef(FileDefABC): +# LAYOUT = Struct("<5I 2B") +# # v7 Specific data +# modified: datetime # Unix EPOCH +# verification_type: FileVerificationType +# +# @classmethod +# def unpack(cls, stream: BinaryIO): +# # print(stream.tell()) +# name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val = cls.LAYOUT.unpack_stream(stream) +# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) +# storage_type = FileStorageType(storage_type_val) +# verification_type = FileVerificationType(verification_type_val) +# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type) +# +# +# @dataclass +# class FileMeta(FileMetaABC): +# modified: datetime +# verification: FileVerificationType +# storage: FileStorageType +# +# +# class File(FileABC): +# meta: FileMeta +# +# +# @dataclass +# class Folder(FolderABC): +# folders: List[Folder] +# files: List[File] +# +# +# class Drive(DriveABC): +# folders: List[Folder] +# files: List[File] +# +# +# @dataclass +# class ArchiveMeta(ArchiveMetaABC): +# file_md5: bytes +# header_md5: bytes +# blob_ptr: BlobPtrs # Cached for MD5 +# unk_a: int +# FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" +# HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" +# +# @staticmethod +# def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): +# _BUF_SIZE = 256 * KiB +# hasher = hashlib.md5(eigen) +# stream.seek(start) +# if size is None: +# while True: +# buffer = stream.read(_BUF_SIZE) +# hasher.update(buffer) +# if len(buffer) != _BUF_SIZE: +# break +# else: +# read = 0 +# while read < size: +# buffer = stream.read(min(_BUF_SIZE, size - read)) +# read += len(buffer) +# hasher.update(buffer) +# md5 = bytes.fromhex(hasher.hexdigest()) +# if md5 != expected: +# raise MD5MismatchError(md5, expected) +# +# def validate_file_md5(self, stream: BinaryIO): +# self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) +# +# def validate_header_md5(self, stream: BinaryIO): +# self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) +# +# +# class Archive(ArchiveABC): +# meta: ArchiveMeta +# drives: List[Drive] # typing +# +# TOC_PTRS = _ToCPtrs +# VDRIVE_DEF = _DriveDef +# FOLDER_DEF = _FolderDef +# FILE_DEF = FileDef +# VERSION = version +# META_PREFIX_LAYOUT = Struct("<16s 128s 16s 6I") +# +# _UNIQUE_UNKS: ClassVar = set() # For Analysis +# +# @classmethod +# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): +# files = [] +# for f_def in file_defs: +# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type) # TODO handle hash +# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) +# file = File(names[f_def.name_rel_pos], meta, None, sparse) +# files.append(file) +# return files +# +# @classmethod +# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: +# encoded_name: bytes +# file_md5, encoded_name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = cls.META_PREFIX_LAYOUT.unpack_stream(stream) +# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") +# assert RSV_1 == 1, RSV_1 +# assert RSV_0 == 0, RSV_0 +# # header_pos = stream.tell() +# stream.seek(header_pos) +# toc_ptrs = cls.TOC_PTRS.unpack(stream) +# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) +# meta = ArchiveMeta(file_md5, header_md5, blob_ptrs, unk_a) +# cls._UNIQUE_UNKS.add(unk_a) +# return decoded_name, meta, blob_ptrs, toc_ptrs +# +# +# class API(APIvX): +# version = version +# Archive = Archive +# File = File +# Folder = Folder +# Drive = Drive diff --git a/src/relic/sga/v5/__init__.py b/src/relic/sga/v5/__init__.py new file mode 100644 index 0000000..a1d8d2b --- /dev/null +++ b/src/relic/sga/v5/__init__.py @@ -0,0 +1,22 @@ +from relic.sga import _abc +from relic.sga.v5._serializers import APISerializers +from relic.sga.v5.core import Archive, Drive, Folder, File, ArchiveMetadata, version + + +def _create_api(): + serializer = APISerializers() + api = _abc.API(version, Archive, Drive, Folder, File, serializer) + return api + + +API = _create_api() + +__all__ = [ + "Archive", + "Drive", + "Folder", + "File", + "API", + "version", + "ArchiveMetadata" +] diff --git a/src/relic/sga/v5/_serializers.py b/src/relic/sga/v5/_serializers.py new file mode 100644 index 0000000..163a722 --- /dev/null +++ b/src/relic/sga/v5/_serializers.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import BinaryIO, ClassVar, Optional + +from serialization_tools.structx import Struct + +from relic.sga import _abc, _serializers as _s +from relic.sga._abc import Archive +from relic.sga.core import MagicWord, Version, MismatchError +from relic.sga.protocols import StreamSerializer, StorageType, VerificationType +from relic.sga.v5 import core + +folder_layout = Struct(" int: + modified: int = int(value.modified.timestamp()) + storage_type = value.storage_type.value # convert enum to value + verification_type = value.verification.value # convert enum to value + args = value.name_pos, value.data_pos, value.length_on_disk, value.length_in_archive, storage_type, modified, verification_type + return self.layout.pack_stream(stream, *args) + + +file_serializer = FileDefSerializer(file_layout) +toc_layout = Struct(" Archive: + MagicWord.read_magic_word(stream) + version = Version.unpack(stream) + version.assert_version_matches(self.version) + + name: bytes + file_md5, name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = self.layout.unpack_stream(stream) + if (RSV_1, RSV_0) != (1, 0): + raise MismatchError("Reserved Field", (RSV_1, RSV_0), (1, 0)) + # header_pos = stream.tell() + stream.seek(header_pos) + toc_header = self.TocHeader.unpack(stream) + drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) + names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + + if not lazy: + for file in files: + lazy_info: Optional[_abc._FileLazyInfo] = file._lazy_info + if lazy_info is None: + raise Exception("API read files, but failed to create lazy info!") + else: + file.data = lazy_info.read(decompress) + file._lazy_info = None + + name: str = name.rstrip(b"").decode("utf-16-le") + file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) + header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) + metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper, unk_a) + + return Archive(name, metadata, drives) + + def write(self, stream: BinaryIO, archive: Archive) -> int: + raise NotImplementedError + + def __init__(self): + self.DriveDef = drive_serializer + self.FolderDef = folder_serializer + self.FileDef = file_serializer + self.TocHeader = toc_header_serializer + self.version = core.version + self.layout = Struct("<16s 128s 16s 2I") diff --git a/src/relic/sga/v5/core.py b/src/relic/sga/v5/core.py new file mode 100644 index 0000000..9f77fd6 --- /dev/null +++ b/src/relic/sga/v5/core.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Optional, BinaryIO + +from relic.sga import _abc +from relic.sga._abc import FileDefABC +from relic.sga.core import Version +from relic.sga.protocols import VerificationType + +version = Version(2) + + +@dataclass +class _Md5ChecksumHelper: + expected: bytes + stream: BinaryIO + start: int + size: Optional[int] = None + eigen: Optional[bytes] = None + + def validate(self, stream: BinaryIO = None) -> None: + stream = self.stream if stream is None else stream + stream.seek(self.start) + + +@dataclass +class ArchiveMetadata: + @property + def file_md5(self) -> bytes: + return self._file_md5.expected + + @property + def header_md5(self) -> bytes: + return self._header_md5.expected + + _file_md5: _Md5ChecksumHelper + _header_md5: _Md5ChecksumHelper + unk_a:int + + +@dataclass +class FileDef(FileDefABC): + modified: datetime + verification: VerificationType + + +@dataclass +class FileMetadata: + modified: datetime + verification: VerificationType + + +Archive = _abc.Archive[ArchiveMetadata] +Folder = _abc.Folder +File = _abc.File[FileMetadata] +Drive = _abc.Drive diff --git a/src/relic/sga/v7.py b/src/relic/sga/v7.py index e410070..a6308e4 100644 --- a/src/relic/sga/v7.py +++ b/src/relic/sga/v7.py @@ -1,113 +1,124 @@ -from __future__ import annotations -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional - -from serialization_tools.structx import Struct - -from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC - - -class _ToCPtrs(ToCPtrsABC): - LAYOUT = ToCPtrsABC.LAYOUT_UINT32 - - -class _DriveDef(DriveDefABC): - LAYOUT = DriveDefABC.LAYOUT_UINT32 - - -class _FolderDef(FolderDefABC): - LAYOUT = FolderDefABC.LAYOUT_UINT32 - - -@dataclass -class FileDef(FileDefABC): - LAYOUT = Struct("<5I 2B 2I") - # v7 Specific data - modified: datetime # Unix EPOCH - verification_type: FileVerificationType - crc: int - hash_pos: int - - @classmethod - def unpack(cls, stream: BinaryIO): - # print(stream.tell()) - name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val, crc, hash_pos = cls.LAYOUT.unpack_stream(stream) - modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type = FileStorageType(storage_type_val) - verification_type = FileVerificationType(verification_type_val) - return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) - - -@dataclass -class FileMeta(FileMetaABC): - modified: datetime - verification: FileVerificationType - storage: FileStorageType - crc: int - hash: bytes - - -class File(FileABC): - meta: FileMeta - - -@dataclass -class Folder(FolderABC): - folders: List[Folder] - files: List[File] - - -class Drive(DriveABC): - folders: List[Folder] - files: List[File] - - -@dataclass -class ArchiveMeta(ArchiveMetaABC): - LAYOUT: ClassVar = Struct("<2I") - unk_a: int - block_size: int - - @classmethod - def unpack(cls, stream): - layout = cls.LAYOUT - args = layout.unpack_stream(stream) - return cls(*args) - - def pack(self, stream): - layout = self.LAYOUT - args = self.unk_a, self.block_size - return layout.pack_stream(stream, *args) - - -class Archive(ArchiveABC): - drives: List[Drive] # typing - TOC_PTRS = _ToCPtrs - VDRIVE_DEF = _DriveDef - FOLDER_DEF = _FolderDef - FILE_DEF = FileDef - VERSION = Version(7) - META_PREFIX_LAYOUT = Struct("<128s 3I") - - @classmethod - def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): - files = [] - for f_def in file_defs: - meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash - sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) - file = File(names[f_def.name_rel_pos], meta, None, sparse) - files.append(file) - return files - - @classmethod - def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: - encoded_name: bytes - encoded_name, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) - decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") - assert RSV_1 == 1 - header_pos = stream.tell() - toc_ptrs = cls.TOC_PTRS.unpack(stream) - meta = ArchiveMeta.unpack(stream) - blob_ptrs = BlobPtrs(header_pos, None, data_pos, None) - return decoded_name, meta, blob_ptrs, toc_ptrs +# from __future__ import annotations +# from dataclasses import dataclass +# from datetime import datetime, timezone +# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional +# +# from serialization_tools.structx import Struct +# +# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC +# from relic.sga.vX import APIvX +# +# version = Version(7) +# +# +# class _ToCPtrs(ToCPtrsABC): +# LAYOUT = ToCPtrsABC.LAYOUT_UINT32 +# +# +# class _DriveDef(DriveDefABC): +# LAYOUT = DriveDefABC.LAYOUT_UINT32 +# +# +# class _FolderDef(FolderDefABC): +# LAYOUT = FolderDefABC.LAYOUT_UINT32 +# +# +# @dataclass +# class FileDef(FileDefABC): +# LAYOUT = Struct("<5I 2B 2I") +# # v7 Specific data +# modified: datetime # Unix EPOCH +# verification_type: FileVerificationType +# crc: int +# hash_pos: int +# +# @classmethod +# def unpack(cls, stream: BinaryIO): +# # print(stream.tell()) +# name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val, crc, hash_pos = cls.LAYOUT.unpack_stream(stream) +# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) +# storage_type = FileStorageType(storage_type_val) +# verification_type = FileVerificationType(verification_type_val) +# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) +# +# +# @dataclass +# class FileMeta(FileMetaABC): +# modified: datetime +# verification: FileVerificationType +# storage: FileStorageType +# crc: int +# hash: bytes +# +# +# class File(FileABC): +# meta: FileMeta +# +# +# @dataclass +# class Folder(FolderABC): +# folders: List[Folder] +# files: List[File] +# +# +# class Drive(DriveABC): +# folders: List[Folder] +# files: List[File] +# +# +# @dataclass +# class ArchiveMeta(ArchiveMetaABC): +# LAYOUT: ClassVar = Struct("<2I") +# unk_a: int +# block_size: int +# +# @classmethod +# def unpack(cls, stream): +# layout = cls.LAYOUT +# args = layout.unpack_stream(stream) +# return cls(*args) +# +# def pack(self, stream): +# layout = self.LAYOUT +# args = self.unk_a, self.block_size +# return layout.pack_stream(stream, *args) +# +# +# class Archive(ArchiveABC): +# drives: List[Drive] # typing +# TOC_PTRS = _ToCPtrs +# VDRIVE_DEF = _DriveDef +# FOLDER_DEF = _FolderDef +# FILE_DEF = FileDef +# VERSION = Version(7) +# META_PREFIX_LAYOUT = Struct("<128s 3I") +# +# @classmethod +# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): +# files = [] +# for f_def in file_defs: +# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash +# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) +# file = File(names[f_def.name_rel_pos], meta, None, sparse) +# files.append(file) +# return files +# +# @classmethod +# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: +# encoded_name: bytes +# encoded_name, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) +# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") +# assert RSV_1 == 1 +# header_pos = stream.tell() +# toc_ptrs = cls.TOC_PTRS.unpack(stream) +# meta = ArchiveMeta.unpack(stream) +# blob_ptrs = BlobPtrs(header_pos, None, data_pos, None) +# return decoded_name, meta, blob_ptrs, toc_ptrs +# +# +# class API(APIvX): +# version = version +# Archive = Archive +# File = File +# Folder = Folder +# Drive = Drive diff --git a/src/relic/sga/v7/__init__.py b/src/relic/sga/v7/__init__.py new file mode 100644 index 0000000..4d5d7ca --- /dev/null +++ b/src/relic/sga/v7/__init__.py @@ -0,0 +1,22 @@ +from relic.sga import _abc +from relic.sga.v7._serializers import APISerializers +from relic.sga.v7.core import Archive, Drive, Folder, File, ArchiveMetadata, version + + +def _create_api(): + serializer = APISerializers() + api = _abc.API(version, Archive, Drive, Folder, File, serializer) + return api + + +API = _create_api() + +__all__ = [ + "Archive", + "Drive", + "Folder", + "File", + "API", + "version", + "ArchiveMetadata" +] diff --git a/src/relic/sga/v7/_serializers.py b/src/relic/sga/v7/_serializers.py new file mode 100644 index 0000000..ce4526b --- /dev/null +++ b/src/relic/sga/v7/_serializers.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import BinaryIO, Optional + +from serialization_tools.structx import Struct + +from relic.sga import _abc, _serializers as _s +from relic.sga._abc import Archive +from relic.sga.core import MagicWord, Version, MismatchError +from relic.sga.protocols import StreamSerializer, StorageType, VerificationType +from relic.sga.v7 import core + +folder_layout = Struct(" int: + modified: int = int(value.modified.timestamp()) + storage_type = value.storage_type.value # convert enum to value + verification_type = value.verification.value # convert enum to value + args = value.name_pos, value.data_pos, value.length_on_disk, value.length_in_archive, modified, verification_type, storage_type, value.crc, value.hash_pos + return self.layout.pack_stream(stream, *args) + + +file_serializer = FileDefSerializer(file_layout) +toc_layout = Struct("<8I") +toc_header_serializer = _s.TocHeaderSerializer(toc_layout) + + +class APISerializers(_abc.APISerializer): + def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> Archive: + MagicWord.read_magic_word(stream) + version = Version.unpack(stream) + version.assert_version_matches(self.version) + + name: bytes + name, header_size, data_pos, RSV_1 = self.layout.unpack_stream(stream) + if RSV_1 != 1: + raise MismatchError("Reserved Field", RSV_1, 1) + header_pos = stream.tell() + # stream.seek(header_pos) + toc_header = self.TocHeader.unpack(stream) + unk_a, block_size = self.metadata_layout.unpack_stream(stream) + drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) + names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + + if not lazy: + for file in files: + lazy_info: Optional[_abc._FileLazyInfo] = file._lazy_info + if lazy_info is None: + raise Exception("API read files, but failed to create lazy info!") + else: + file.data = lazy_info.read(decompress) + file._lazy_info = None + + name: str = name.rstrip(b"").decode("utf-16-le") + metadata = core.ArchiveMetadata(unk_a,block_size) + + return Archive(name, metadata, drives) + + def write(self, stream: BinaryIO, archive: Archive) -> int: + raise NotImplementedError + + def __init__(self): + self.DriveDef = drive_serializer + self.FolderDef = folder_serializer + self.FileDef = file_serializer + self.TocHeader = toc_header_serializer + self.version = core.version + self.layout = Struct("<128s 3I") + self.metadata_layout = Struct("<2I") diff --git a/src/relic/sga/v7/core.py b/src/relic/sga/v7/core.py new file mode 100644 index 0000000..bc252ad --- /dev/null +++ b/src/relic/sga/v7/core.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Optional, BinaryIO + +from relic.sga import _abc +from relic.sga._abc import FileDefABC +from relic.sga.core import Version +from relic.sga.protocols import VerificationType + +version = Version(2) + + +@dataclass +class ArchiveMetadata: + unk_a: int + block_size:int + + +@dataclass +class FileDef(FileDefABC): + modified: datetime + verification: VerificationType + crc: int + hash_pos: int + + +@dataclass +class FileMetadata: + modified: datetime + verification: VerificationType + crc: int + hash_pos: int + + +Archive = _abc.Archive[ArchiveMetadata] +Folder = _abc.Folder +File = _abc.File[FileMetadata] +Drive = _abc.Drive diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py index 3c3b91c..aef0972 100644 --- a/src/relic/sga/v9.py +++ b/src/relic/sga/v9.py @@ -1,105 +1,116 @@ -from __future__ import annotations -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional - -from serialization_tools.structx import Struct - -from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC - - -class _ToCPtrs(ToCPtrsABC): - LAYOUT = ToCPtrsABC.LAYOUT_UINT32 - - -class _DriveDef(DriveDefABC): - LAYOUT = DriveDefABC.LAYOUT_UINT32 - - -class _FolderDef(FolderDefABC): - LAYOUT = FolderDefABC.LAYOUT_UINT32 - - -@dataclass -class FileDef(FileDefABC): - LAYOUT = Struct("<2I Q 3I 2B I") - # v7 Specific data - modified: datetime # Unix EPOCH - verification_type: FileVerificationType - crc: int - hash_pos: int - - @classmethod - def unpack(cls, stream: BinaryIO): - name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type_val, storage_type_val, crc = cls.LAYOUT.unpack_stream(stream) - modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type = FileStorageType(storage_type_val) - verification_type = FileVerificationType(verification_type_val) - return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) - - -@dataclass -class FileMeta(FileMetaABC): - modified: datetime - verification: FileVerificationType - storage: FileStorageType - crc: int - hash: bytes - - -class File(FileABC): - meta: FileMeta - - -@dataclass -class Folder(FolderABC): - folders: List[Folder] - files: List[File] - - -class Drive(DriveABC): - folders: List[Folder] - files: List[File] - - -@dataclass -class ArchiveMeta(ArchiveMetaABC): - sha_256: bytes - unk_a: int - unk_b: int - block_size: int - - -class Archive(ArchiveABC): - drives: List[Drive] # typing - TOC_PTRS = _ToCPtrs - VDRIVE_DEF = _DriveDef - FOLDER_DEF = _FolderDef - FILE_DEF = FileDef - VERSION = Version(9) - META_PREFIX_LAYOUT = Struct("<128s QIQQ I 256s") - META_POSTFIX_LAYOUT = Struct("<3I") - NAME_BUFFER_USES_COUNT = False - - @classmethod - def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): - files = [] - for f_def in file_defs: - meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash - sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) - file = File(names[f_def.name_rel_pos], meta, None, sparse) - files.append(file) - return files - - @classmethod - def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: - encoded_name: bytes - encoded_name, header_pos, header_size, data_pos, data_size, RSV_1, sha_256 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) - decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") - assert RSV_1 == 1, RSV_1 - stream.seek(header_pos) - toc_ptrs = cls.TOC_PTRS.unpack(stream) - unk_a, unk_b, block_size = cls.META_POSTFIX_LAYOUT.unpack_stream(stream) - meta = ArchiveMeta(sha_256,unk_a,unk_b, block_size) - blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, data_size) - return decoded_name, meta, blob_ptrs, toc_ptrs +# from __future__ import annotations +# from dataclasses import dataclass +# from datetime import datetime, timezone +# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional +# +# from serialization_tools.structx import Struct +# +# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC +# from relic.sga.vX import APIvX +# +# version = Version(9) +# +# class _ToCPtrs(ToCPtrsABC): +# LAYOUT = ToCPtrsABC.LAYOUT_UINT32 +# +# +# class _DriveDef(DriveDefABC): +# LAYOUT = DriveDefABC.LAYOUT_UINT32 +# +# +# class _FolderDef(FolderDefABC): +# LAYOUT = FolderDefABC.LAYOUT_UINT32 +# +# +# @dataclass +# class FileDef(FileDefABC): +# LAYOUT = Struct("<2I Q 3I 2B I") +# # v7 Specific data +# modified: datetime # Unix EPOCH +# verification_type: FileVerificationType +# crc: int +# hash_pos: int +# +# @classmethod +# def unpack(cls, stream: BinaryIO): +# name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type_val, storage_type_val, crc = cls.LAYOUT.unpack_stream(stream) +# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) +# storage_type = FileStorageType(storage_type_val) +# verification_type = FileVerificationType(verification_type_val) +# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) +# +# +# @dataclass +# class FileMeta(FileMetaABC): +# modified: datetime +# verification: FileVerificationType +# storage: FileStorageType +# crc: int +# hash: bytes +# +# +# class File(FileABC): +# meta: FileMeta +# +# +# @dataclass +# class Folder(FolderABC): +# folders: List[Folder] +# files: List[File] +# +# +# class Drive(DriveABC): +# folders: List[Folder] +# files: List[File] +# +# +# @dataclass +# class ArchiveMeta(ArchiveMetaABC): +# sha_256: bytes +# unk_a: int +# unk_b: int +# block_size: int +# +# +# class Archive(ArchiveABC): +# drives: List[Drive] # typing +# TOC_PTRS = _ToCPtrs +# VDRIVE_DEF = _DriveDef +# FOLDER_DEF = _FolderDef +# FILE_DEF = FileDef +# VERSION = version +# META_PREFIX_LAYOUT = Struct("<128s QIQQ I 256s") +# META_POSTFIX_LAYOUT = Struct("<3I") +# NAME_BUFFER_USES_COUNT = False +# +# @classmethod +# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): +# files = [] +# for f_def in file_defs: +# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash +# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) +# file = File(names[f_def.name_rel_pos], meta, None, sparse) +# files.append(file) +# return files +# +# @classmethod +# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: +# encoded_name: bytes +# encoded_name, header_pos, header_size, data_pos, data_size, RSV_1, sha_256 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) +# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") +# assert RSV_1 == 1, RSV_1 +# stream.seek(header_pos) +# toc_ptrs = cls.TOC_PTRS.unpack(stream) +# unk_a, unk_b, block_size = cls.META_POSTFIX_LAYOUT.unpack_stream(stream) +# meta = ArchiveMeta(sha_256,unk_a,unk_b, block_size) +# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, data_size) +# return decoded_name, meta, blob_ptrs, toc_ptrs +# +# def _pack_meta(self,): +# +# class API(APIvX): +# version = version +# Archive = Archive +# File = File +# Folder = Folder +# Drive = Drive \ No newline at end of file diff --git a/src/relic/sga/vX.py b/src/relic/sga/vX.py index 61ccdc9..6f2e03b 100644 --- a/src/relic/sga/vX.py +++ b/src/relic/sga/vX.py @@ -1,37 +1,36 @@ -from types import ModuleType -from typing import Type, Protocol - -from relic.sga.core import ArchiveABC, Version - - -class APIvX(Protocol): - """ - Allows us to have a TYPED OBJECT with required types for each version - - """ - - - version:Version - # Archive - Archive: Type[ArchiveABC] - # ArchiveHeader: Type[protocols.ArchiveHeader] - # # Table Of Contents - # ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] - # ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] - # # Files - # FileHeader: Type[abc_.FileHeaderABC] - # File: Type[protocols.File] - # # Folders - # FolderHeader: Type[abc_.FolderHeaderABC] - # Folder: Type[protocols.Folder] - # # VDrive - # VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] - # VirtualDrive: Type[protocols.VirtualDrive] - - -"""Modules implementing vX should define all of the following attributes""" -required_attrs = APIvX.__annotations__.keys() - - -def is_module_api(module: ModuleType): - return all(hasattr(module, attr) for attr in required_attrs) +# from types import ModuleType +# from typing import Type, Protocol +# +# from relic.sga.protocols import Archive, File, Folder, Drive +# +# +# class APIvX(Protocol): +# """ +# Allows us to have a TYPED OBJECT with required types for each version +# +# """ +# +# version: Version +# # Archive +# Archive: Type[Archive] +# # ArchiveHeader: Type[protocols.ArchiveHeader] +# # # Table Of Contents +# # ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] +# # ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] +# # # Files +# # FileHeader: Type[abc_.FileHeaderABC] +# File: Type[FileABC] +# # # Folders +# # FolderHeader: Type[abc_.FolderHeaderABC] +# Folder: Type[FolderABC] +# # # VDrive +# # VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] +# Drive: Type[DriveABC] +# +# +# """Modules implementing vX should define all of the following attributes""" +# required_attrs = APIvX.__annotations__.keys() +# +# +# def is_module_api(module: ModuleType): +# return all(hasattr(module, attr) for attr in required_attrs) diff --git a/src/relic/sga/writer_tester.py b/src/relic/sga/writer_tester.py new file mode 100644 index 0000000..28f1c92 --- /dev/null +++ b/src/relic/sga/writer_tester.py @@ -0,0 +1,36 @@ +# # To write an Archive +# import zlib +# from abc import abstractmethod +# from io import BytesIO +# from typing import List, BinaryIO, Dict, Tuple, Type, Optional +# +# from relic.sga import v9 +# from relic.sga.core import DriveABC, DriveDefABC, FolderDefABC, ArchivePathable, FileDefABC, FolderABC, FileABC, FileStorageType, FileMetaABC, ArchiveABC +# +# +# +# +# if __name__ == "__main__": +# a = v9.Archive("Test", None, []) +# drive = DriveABC([], [], "data", "Test Archvie") +# a.drives = [drive] +# drive_folder = FolderABC("drive-folder-a", [], [], _parent_path=drive) +# drive_file = FileABC("drive-file-buffer-comp-b.raw", FileMetaABC(FileStorageType.BufferCompress), b"This is a test 'buffer compress' file!", _parent_path=drive) +# drive.folders = [drive_folder] +# drive.files = [drive_file] +# +# drive_folder_folder = FolderABC("drive-folder-folder-c", [], [], _parent_path=drive_folder) +# drive_folder_file_d = FileABC("drive-folder-file-stream-comp-d.raw", FileMetaABC(FileStorageType.StreamCompress), b"This is a test 'stream compress' file!", _parent_path=drive_folder) +# drive_folder_file_e = FileABC("drive-folder-file-store-e.raw", FileMetaABC(FileStorageType.StreamCompress), b"This is a test 'stream compress' file!", _parent_path=drive_folder) +# drive_folder.folders = [drive_folder_folder] +# drive_folder.files = [drive_folder_file_d, drive_folder_file_e] +# +# with BytesIO() as name_stream: +# with BytesIO() as data_stream: +# writer = ArchiveFlattener(name_stream, data_stream) +# writer.flatten_archive(a) +# name_stream.seek(0) +# data_stream.seek(0) +# names = name_stream.read() +# data = data_stream.read() +# _ = None diff --git a/src/scripts/universal/sga/common.py b/src/scripts/universal/sga/common.py index 3c9484a..2a12f2e 100644 --- a/src/scripts/universal/sga/common.py +++ b/src/scripts/universal/sga/common.py @@ -5,7 +5,7 @@ from serialization_tools.walkutil import blacklisted -from relic.sga_old.common import ArchiveMagicWord +from relic.sga.core import MagicWord from scripts.universal.common import print_error, print_wrote, print_reading, PrintOptions, SharedExtractorParser SharedSgaParser = argparse.ArgumentParser(parents=[SharedExtractorParser], add_help=False) @@ -29,7 +29,7 @@ def is_sga(input_file: str, ext: Union[str, List[str]] = None, magic: bool = Fal # Make sure magic word is present if magic: with open(input_file, "rb") as check_handle: - return ArchiveMagicWord.check_magic_word(check_handle) + return MagicWord.check_magic_word(check_handle) return True diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index 425bff4..fd619c0 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,8 +3,8 @@ from pathlib import Path from typing import Dict -import relic.sga_old.common -import relic.sga_old.io +from relic.sga.core import FileABC +from relic.sga.apis import read_archive from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner @@ -26,37 +26,39 @@ def extract_args(args: argparse.Namespace) -> Dict: def unpack_archive(in_path: str, out_path: str, print_opts: PrintOptions = None, prepend_archive_path: bool = True, indent_level: int = 0, **kwargs): out_path = Path(out_path) with open(in_path, "rb") as in_handle: - archive = relic.sga.io.unpack_archive(in_handle) + archive = read_archive(in_handle, True) archive_name = splitext(basename(in_path))[0] - with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: - print_any(f"Unpacking \"{archive_name}\"...", indent_level, print_opts) - for _, _, _, files in archive.walk(): - for file in files: - try: - relative_file_path = file.full_path - - if ':' in relative_file_path.parts[0]: - relative_file_path = str(relative_file_path).replace(":", "") - - rel_out_path = Path(out_path) - if prepend_archive_path: - rel_out_path /= archive_name - - rel_out_path /= relative_file_path - - rel_out_path.parent.mkdir(parents=True, exist_ok=True) - print_any(f"Reading \"{relative_file_path}\"...", indent_level + 1, print_opts) - with open(rel_out_path, "wb") as out_handle: - data = file.read_data(data_stream, True) - out_handle.write(data) - print_any(f"Writing \"{rel_out_path}\"...", indent_level + 2, print_opts) - except KeyboardInterrupt: + # with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: + print_any(f"Unpacking \"{archive_name}\"...", indent_level, print_opts) + for _, _, _, files in archive.walk(): + for file in files: + file: FileABC + try: + relative_file_path = file.path + + # Cant use drive since our 'drive' isn't one letter + if ':' in relative_file_path.parts[0]: + relative_file_path = str(relative_file_path).replace(":", "") # Valid on windows systems, on posix; idk + + rel_out_path = Path(out_path) + if prepend_archive_path: + rel_out_path /= archive_name + + rel_out_path /= relative_file_path + + rel_out_path.parent.mkdir(parents=True, exist_ok=True) + print_any(f"Reading \"{relative_file_path}\"...", indent_level + 1, print_opts) + with open(rel_out_path, "wb") as out_handle: + file.read_data(in_handle) + out_handle.write(file.data) + print_any(f"Writing \"{rel_out_path}\"...", indent_level + 2, print_opts) + except KeyboardInterrupt: + raise + except BaseException as e: + if not print_opts or print_opts.error_fail: raise - except BaseException as e: - if not print_opts or print_opts.error_fail: - raise - else: - print_error(e, indent_level, print_opts) + else: + print_error(e, indent_level, print_opts) Runner = get_runner(unpack_archive, extract_args) diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index de90e4a..ba01a51 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -3,30 +3,28 @@ import pytest -from relic.sga_old import protocols -from relic.sga_old.abc_old_ import ArchiveABC -from relic.sga_old.protocols import ArchiveWalk +from relic.sga.core import ArchiveWalk, ArchiveABC as Archive from tests.helpers import TF from tests.relic.sga.datagen import DowII, DowI, DowIII -def _ARCHIVE_WALK_SAMPLE(a: protocols.Archive) -> ArchiveWalk: +def _ARCHIVE_WALK_SAMPLE(a: Archive) -> ArchiveWalk: d = a.drives[0] - sfs = d.sub_folders + sfs = d.folders dfs = d.files yield d, None, sfs, dfs yield d, sfs[0], [], sfs[0].files class ArchiveTests: - def assert_equal(self, expected: ArchiveABC, result: ArchiveABC, sparse: bool): - assert expected.header == result.header + def assert_equal(self, expected: Archive, result: Archive, sparse: bool): + assert expected.meta == result.meta if sparse: assert result._sparse # TODO @abstractmethod - def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): + def test_walk(self, archive: Archive, expected: ArchiveWalk): archive_walk = archive.walk() for (a_vdrive, a_folder, a_folders, a_files), (e_vdrive, e_folder, e_folders, e_files) in zip(archive_walk, expected): assert a_vdrive == e_vdrive @@ -35,7 +33,7 @@ def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): assert a_files == e_files @abstractmethod - def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + def test_unpack(self, stream_data: bytes, expected: Archive): for sparse in TF: with BytesIO(stream_data) as stream: archive = expected.__class__.unpack(stream, expected.header, sparse) @@ -43,7 +41,7 @@ def test_unpack(self, stream_data: bytes, expected: ArchiveABC): self.assert_equal(expected, archive, sparse) @abstractmethod - def test_pack(self, archive: ArchiveABC, expected: bytes): + def test_pack(self, archive: Archive, expected: bytes): for write_magic in TF: try: with BytesIO() as stream: @@ -64,17 +62,17 @@ def fast_gen_dow1_archive(*args): class TestArchiveV2(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + def test_unpack(self, stream_data: bytes, expected: Archive): super().test_unpack(stream_data, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) - def test_pack(self, archive: ArchiveABC, expected: bytes): + def test_pack(self, archive: Archive, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW1_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW1_ARCHIVE))]) - def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): + def test_walk(self, archive: Archive, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -88,17 +86,17 @@ def fast_gen_dow2_archive(*args): class TestArchiveV5(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + def test_unpack(self, stream_data: bytes, expected: Archive): super().test_unpack(stream_data, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) - def test_pack(self, archive: ArchiveABC, expected: bytes): + def test_pack(self, archive: Archive, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW2_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW2_ARCHIVE))]) - def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): + def test_walk(self, archive: Archive, expected: ArchiveWalk): super().test_walk(archive, expected) @@ -112,15 +110,15 @@ def fast_gen_dow3_archive(*args): class TestArchiveV9(ArchiveTests): @pytest.mark.parametrize(["stream_data", "expected"], [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: ArchiveABC): + def test_unpack(self, stream_data: bytes, expected: Archive): super().test_unpack(stream_data, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) - def test_pack(self, archive: ArchiveABC, expected: bytes): + def test_pack(self, archive: Archive, expected: bytes): super().test_pack(archive, expected) @pytest.mark.parametrize(["archive", "expected"], [(DOW3_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW3_ARCHIVE))]) - def test_walk(self, archive: ArchiveABC, expected: ArchiveWalk): + def test_walk(self, archive: Archive, expected: ArchiveWalk): super().test_walk(archive, expected) diff --git a/tests/relic/sga/archive/test_archive_header.py b/tests/relic/sga/archive/test_archive_header.py index 4547c85..e21d536 100644 --- a/tests/relic/sga/archive/test_archive_header.py +++ b/tests/relic/sga/archive/test_archive_header.py @@ -1,203 +1,203 @@ -from abc import abstractmethod -from io import BytesIO -from typing import List, Type - -import pytest -from serialization_tools.ioutil import WindowPtr, Ptr -from serialization_tools.size import KiB, MiB, GiB - -from relic.common import Version -from relic.sga_old import protocols as proto, v2, v5, v9 -from relic.sga_old.checksums import gen_md5_checksum, validate_md5_checksum -from relic.sga_old.common import ArchiveVersion -from tests.helpers import TF -from tests.relic.sga.datagen import DowI, DowII, DowIII - - -class ArchiveHeaderTests: - @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_validate_checksums(self, archive: bytes, cls: Type[proto.ArchiveHeader]): - for fast in TF: - for _assert in TF: - with BytesIO(archive) as stream: - stream.seek(12) # skip magic/version - archive_header = cls.unpack(stream) - archive_header.validate_checksums(stream, fast=fast, _assert=_assert) - - @abstractmethod # Trick PyCharm into requiring us to redefine this - def test_version(self, archive: proto.ArchiveHeader, expected: Version): - assert archive.version == expected - - @abstractmethod - def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): - with BytesIO(buffer) as stream: - unpacked = expected.__class__.unpack(stream) - assert expected == unpacked - - @abstractmethod - def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): - with BytesIO() as stream: - written = inst.pack(stream) - stream.seek(0) - packed = stream.read() - assert len(packed) == written - assert expected == packed - - -_KNOWN_EIGEN = b'06BEF126-4E3C-48D3-8D2E-430BF125B54F' -_KNOWN_DATA = b'\xf3\x0cGjx:"\xb7O\x89\xc1\x82H\xb2\xa1\xaa\x82-\xe4\\{\xe2\x905\x0c\xdbT\x0c\x82\xa3y\xdat\xd5\xdf\xb7\x04\x1e\xd0\xaa\xf6\xc9|U%\xf7\x0c\xb9\x92\xc9\xbf\xa9\xa3\xaaQ]\xb6\x8c\x10\x87\xc3r\xe3\x89\x16T\x936\xc5l/(\xbd\xbc\x08\xa2\x9b`|\xec\xd5\xf3\xfd\x83\x85\xadHY\xf4U\xb8\x85\x92\xcd\x1d\xc1\xa2\x0f\xbam!\xd5\xacnft>\'\xf0\x12\x9c\x0c\x1c{\xa2\x15VI\xb0\x13\x89\xde\x889\xdc\x15_\xc8\\\x97\x06\xa7\xde\xc0p\xf9o\t\xd3_\x9d\xa7@.\x81\xed\xdd\x13\x9b m9\xf5\x1bV\xc3\xe0\xd4@\x99\xa2\x8aGr\x04\xff\x05\xedIs\x15\t0\x98G\x87O\x9c\xa1\xd2\tcS\xb3\x1eI\xf5\xe3Qp\xe0\xd0m\xbf;\xfb\x856\xa7\\\xb8\xad\x19\xc1\xa3\xaf+\xd4\x08\xd5Y4\x87p|p`dQ\x1c|>is\x17;\xa6\x8d\xa2\xa4\xdc\xe0\xd6\xaf\xc3\x93\xf59\x9a[\x19J\xc88\xb8\xfd/\xe4\xc6J\x8c\xddCY&\x8f' -_KNOWN_BAD_DATA = b'\xe9F{\x17\xc2\x118\xe4\x0c\xbd\x07\xf2\x07\x03:\xee%\xabx<\xc3\xb5\x98\x7f\xa6[\xc53+Y]t' -_KNOWN_DATA_MD5 = b'\x0f\xd3\xc3|\xb2d\x16U\xfd\xc2<\x98\x0b\xf1\x91\xde' - - -@pytest.mark.parametrize( - ["stream_data", "eigen", "md5_checksum"], - [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5)] -) -def test_gen_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, buffer_sizes: List[int] = None, ptr: Ptr = None): - buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes - ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr - for buffer_size in buffer_sizes: - with BytesIO(stream_data) as stream: - result = gen_md5_checksum(stream, eigen, buffer_size, ptr) - assert md5_checksum == result - - -@pytest.mark.parametrize( - ["stream_data", "eigen", "md5_checksum", "fail_expected"], - [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, False), - (_KNOWN_BAD_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, True)] -) -def test_validate_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, fail_expected: bool, ptr: WindowPtr = None, buffer_sizes: List[int] = None, ): - buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes - ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr - for _assert in TF: - for buffer_size in buffer_sizes: - try: - with BytesIO(stream_data) as stream: - result = validate_md5_checksum(stream, ptr, eigen, md5_checksum, buffer_size, _assert) - # Own lines to make assertions clearer - except AssertionError as e: - if not fail_expected: # MD5 mismatch; if fail_expected we - raise e - else: - if fail_expected: - # Invalid and should have asserted - assert not result and not _assert - else: - assert result - - -# Not garunteed to be a valid header - -def fast_dow1_archive_header(name, toc_pos, bad_magic: bytes): - _AB = 0, 120 # Random values - return DowI.gen_archive_header(name, *_AB, toc_pos=toc_pos), DowI.gen_archive_header_buffer(name, *_AB), DowI.gen_archive_header_buffer(name, *_AB, magic=bad_magic) - - -DOW1_HEADER, DOW1_HEADER_DATA, DOW1_HEADER_DATA_BAD_MAGIC = fast_dow1_archive_header("Dawn Of War 1 Test Header", 180, b"deadbeef") -# By not writing Magic/Archive TOC-Pos must be changed in the generated DowIIArchiveHeader; the buffers (should be) identical given the same input -DOW1_HEADER_INNER, DOW1_HEADER_INNER_DATA, _ = fast_dow1_archive_header("Dawn Of War 1 Test Header (Inner Pack)", 168, b"deaddead") -DOW1_ARCHIVE_BUFFER = DowI.gen_sample_archive_buffer("Dawn Of War 1 Test Archive", "Tests", "Dow1 Header Tests.txt", b"You thought this was a test, but it was me, DIO!") - -HDR_START = 12 # Most logic now doesn't handle Magic + Version - - -class TestDowIArchiveHeader(ArchiveHeaderTests): - @pytest.mark.parametrize( - ["archive", "cls"], - [(DOW1_ARCHIVE_BUFFER, v2.ArchiveHeader)]) - def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): - super().test_validate_checksums(archive, cls) - - @pytest.mark.parametrize( - ["expected", "inst"], - [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] - ) - def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) - - @pytest.mark.parametrize( - ["buffer", "expected"], - [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] - ) - def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): - super().test_unpack(buffer, expected) - - @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, v2.version)]) - def test_version(self, archive: proto.ArchiveHeader, expected: Version): - super().test_version(archive, expected) - - -# Not garunteed to be a valid header - - -def fast_dow2_archive_header(name): - _ABC = 0, 0, 0 - return DowII.gen_archive_header(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC) - - -DOW2_HEADER, DOW2_HEADER_DATA = fast_dow2_archive_header("Dawn Of War 2 Test Header") -DOW2_ARCHIVE_BUFFER = DowII.gen_sample_archive_buffer("Dawn Of War 2 Test Archive", "Dow2 Tests", "Imperial Propoganda.txt", b"By the Emperor, we're ready to unleash eleven barrels, m' lord, sir!") - - -class TestDowIIArchiveHeader(ArchiveHeaderTests): - @pytest.mark.parametrize( - ["buffer", "expected"], - [(DOW2_HEADER_DATA[HDR_START:], DOW2_HEADER)], - ) - def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): - super().test_unpack(buffer, expected) - - @pytest.mark.parametrize( - ["inst", "expected"], - [(DOW2_HEADER, DOW2_HEADER_DATA[HDR_START:])]) - def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) - - @pytest.mark.parametrize( - ["archive", "cls"], - [(DOW2_ARCHIVE_BUFFER, v5.ArchiveHeader)], - ) - def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): - super().test_validate_checksums(archive, cls) - - @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, v5.version)]) - def test_version(self, archive: proto.ArchiveHeader, expected: Version): - super().test_version(archive, expected) - - -def fast_dow3_archive_header(name, bad_magic: bytes): - _ABCD = 0, 1, 2, 3 - return DowIII.gen_archive_header(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD, magic=bad_magic) - - -DOW3_HEADER, DOW3_HEADER_DATA, DOW3_HEADER_DATA_BAD_MAGIC = fast_dow3_archive_header("Dawn Of War 3 Test Header", b" Marine!") # Big Brain Pun in ` Marine!` - - -class TestDowIIIArchiveHeader(ArchiveHeaderTests): - @pytest.mark.parametrize( - ["archive", "cls"], - [(None, v9.ArchiveHeader)]) - def test_validate_checksums(self, archive: bytes, cls: Type[v9.ArchiveHeader]): - for fast in TF: - for _assert in TF: - # HACK but if it fails it means logic has changed - assert cls.validate_checksums(None, None, fast=fast, _assert=_assert) - - @pytest.mark.parametrize( - ["buffer", "expected"], - [(DOW3_HEADER_DATA[HDR_START:], DOW3_HEADER)], - ) - def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): - super().test_unpack(buffer, expected) - - @pytest.mark.parametrize( - ["inst", "expected"], - [(DOW3_HEADER, DOW3_HEADER_DATA[HDR_START:])]) - def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): - super().test_pack(inst, expected) - - @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, v9.version)]) - def test_version(self, archive: proto.ArchiveHeader, expected: Version): - super().test_version(archive, expected) +# from abc import abstractmethod +# from io import BytesIO +# from typing import List, Type +# +# import pytest +# from serialization_tools.ioutil import WindowPtr, Ptr +# from serialization_tools.size import KiB, MiB, GiB +# +# from relic.common import Version +# from relic.sga_old import protocols as proto, v2, v5, v9 +# from relic.sga_old.checksums import gen_md5_checksum, validate_md5_checksum +# from relic.sga_old.common import ArchiveVersion +# from tests.helpers import TF +# from tests.relic.sga.datagen import DowI, DowII, DowIII +# +# +# class ArchiveHeaderTests: +# @abstractmethod # Trick PyCharm into requiring us to redefine this +# def test_validate_checksums(self, archive: bytes, cls: Type[proto.ArchiveHeader]): +# for fast in TF: +# for _assert in TF: +# with BytesIO(archive) as stream: +# stream.seek(12) # skip magic/version +# archive_header = cls.unpack(stream) +# archive_header.validate_checksums(stream, fast=fast, _assert=_assert) +# +# @abstractmethod # Trick PyCharm into requiring us to redefine this +# def test_version(self, archive: proto.ArchiveHeader, expected: Version): +# assert archive.version == expected +# +# @abstractmethod +# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): +# with BytesIO(buffer) as stream: +# unpacked = expected.__class__.unpack(stream) +# assert expected == unpacked +# +# @abstractmethod +# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): +# with BytesIO() as stream: +# written = inst.pack(stream) +# stream.seek(0) +# packed = stream.read() +# assert len(packed) == written +# assert expected == packed +# +# +# _KNOWN_EIGEN = b'06BEF126-4E3C-48D3-8D2E-430BF125B54F' +# _KNOWN_DATA = b'\xf3\x0cGjx:"\xb7O\x89\xc1\x82H\xb2\xa1\xaa\x82-\xe4\\{\xe2\x905\x0c\xdbT\x0c\x82\xa3y\xdat\xd5\xdf\xb7\x04\x1e\xd0\xaa\xf6\xc9|U%\xf7\x0c\xb9\x92\xc9\xbf\xa9\xa3\xaaQ]\xb6\x8c\x10\x87\xc3r\xe3\x89\x16T\x936\xc5l/(\xbd\xbc\x08\xa2\x9b`|\xec\xd5\xf3\xfd\x83\x85\xadHY\xf4U\xb8\x85\x92\xcd\x1d\xc1\xa2\x0f\xbam!\xd5\xacnft>\'\xf0\x12\x9c\x0c\x1c{\xa2\x15VI\xb0\x13\x89\xde\x889\xdc\x15_\xc8\\\x97\x06\xa7\xde\xc0p\xf9o\t\xd3_\x9d\xa7@.\x81\xed\xdd\x13\x9b m9\xf5\x1bV\xc3\xe0\xd4@\x99\xa2\x8aGr\x04\xff\x05\xedIs\x15\t0\x98G\x87O\x9c\xa1\xd2\tcS\xb3\x1eI\xf5\xe3Qp\xe0\xd0m\xbf;\xfb\x856\xa7\\\xb8\xad\x19\xc1\xa3\xaf+\xd4\x08\xd5Y4\x87p|p`dQ\x1c|>is\x17;\xa6\x8d\xa2\xa4\xdc\xe0\xd6\xaf\xc3\x93\xf59\x9a[\x19J\xc88\xb8\xfd/\xe4\xc6J\x8c\xddCY&\x8f' +# _KNOWN_BAD_DATA = b'\xe9F{\x17\xc2\x118\xe4\x0c\xbd\x07\xf2\x07\x03:\xee%\xabx<\xc3\xb5\x98\x7f\xa6[\xc53+Y]t' +# _KNOWN_DATA_MD5 = b'\x0f\xd3\xc3|\xb2d\x16U\xfd\xc2<\x98\x0b\xf1\x91\xde' +# +# +# @pytest.mark.parametrize( +# ["stream_data", "eigen", "md5_checksum"], +# [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5)] +# ) +# def test_gen_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, buffer_sizes: List[int] = None, ptr: Ptr = None): +# buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes +# ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr +# for buffer_size in buffer_sizes: +# with BytesIO(stream_data) as stream: +# result = gen_md5_checksum(stream, eigen, buffer_size, ptr) +# assert md5_checksum == result +# +# +# @pytest.mark.parametrize( +# ["stream_data", "eigen", "md5_checksum", "fail_expected"], +# [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, False), +# (_KNOWN_BAD_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, True)] +# ) +# def test_validate_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, fail_expected: bool, ptr: WindowPtr = None, buffer_sizes: List[int] = None, ): +# buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes +# ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr +# for _assert in TF: +# for buffer_size in buffer_sizes: +# try: +# with BytesIO(stream_data) as stream: +# result = validate_md5_checksum(stream, ptr, eigen, md5_checksum, buffer_size, _assert) +# # Own lines to make assertions clearer +# except AssertionError as e: +# if not fail_expected: # MD5 mismatch; if fail_expected we +# raise e +# else: +# if fail_expected: +# # Invalid and should have asserted +# assert not result and not _assert +# else: +# assert result +# +# +# # Not garunteed to be a valid header +# +# def fast_dow1_archive_header(name, toc_pos, bad_magic: bytes): +# _AB = 0, 120 # Random values +# return DowI.gen_archive_header(name, *_AB, toc_pos=toc_pos), DowI.gen_archive_header_buffer(name, *_AB), DowI.gen_archive_header_buffer(name, *_AB, magic=bad_magic) +# +# +# DOW1_HEADER, DOW1_HEADER_DATA, DOW1_HEADER_DATA_BAD_MAGIC = fast_dow1_archive_header("Dawn Of War 1 Test Header", 180, b"deadbeef") +# # By not writing Magic/Archive TOC-Pos must be changed in the generated DowIIArchiveHeader; the buffers (should be) identical given the same input +# DOW1_HEADER_INNER, DOW1_HEADER_INNER_DATA, _ = fast_dow1_archive_header("Dawn Of War 1 Test Header (Inner Pack)", 168, b"deaddead") +# DOW1_ARCHIVE_BUFFER = DowI.gen_sample_archive_buffer("Dawn Of War 1 Test Archive", "Tests", "Dow1 Header Tests.txt", b"You thought this was a test, but it was me, DIO!") +# +# HDR_START = 12 # Most logic now doesn't handle Magic + Version +# +# +# class TestDowIArchiveHeader(ArchiveHeaderTests): +# @pytest.mark.parametrize( +# ["archive", "cls"], +# [(DOW1_ARCHIVE_BUFFER, v2.ArchiveHeader)]) +# def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): +# super().test_validate_checksums(archive, cls) +# +# @pytest.mark.parametrize( +# ["expected", "inst"], +# [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] +# ) +# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): +# super().test_pack(inst, expected) +# +# @pytest.mark.parametrize( +# ["buffer", "expected"], +# [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] +# ) +# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): +# super().test_unpack(buffer, expected) +# +# @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, v2.version)]) +# def test_version(self, archive: proto.ArchiveHeader, expected: Version): +# super().test_version(archive, expected) +# +# +# # Not garunteed to be a valid header +# +# +# def fast_dow2_archive_header(name): +# _ABC = 0, 0, 0 +# return DowII.gen_archive_header(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC) +# +# +# DOW2_HEADER, DOW2_HEADER_DATA = fast_dow2_archive_header("Dawn Of War 2 Test Header") +# DOW2_ARCHIVE_BUFFER = DowII.gen_sample_archive_buffer("Dawn Of War 2 Test Archive", "Dow2 Tests", "Imperial Propoganda.txt", b"By the Emperor, we're ready to unleash eleven barrels, m' lord, sir!") +# +# +# class TestDowIIArchiveHeader(ArchiveHeaderTests): +# @pytest.mark.parametrize( +# ["buffer", "expected"], +# [(DOW2_HEADER_DATA[HDR_START:], DOW2_HEADER)], +# ) +# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): +# super().test_unpack(buffer, expected) +# +# @pytest.mark.parametrize( +# ["inst", "expected"], +# [(DOW2_HEADER, DOW2_HEADER_DATA[HDR_START:])]) +# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): +# super().test_pack(inst, expected) +# +# @pytest.mark.parametrize( +# ["archive", "cls"], +# [(DOW2_ARCHIVE_BUFFER, v5.ArchiveHeader)], +# ) +# def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): +# super().test_validate_checksums(archive, cls) +# +# @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, v5.version)]) +# def test_version(self, archive: proto.ArchiveHeader, expected: Version): +# super().test_version(archive, expected) +# +# +# def fast_dow3_archive_header(name, bad_magic: bytes): +# _ABCD = 0, 1, 2, 3 +# return DowIII.gen_archive_header(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD, magic=bad_magic) +# +# +# DOW3_HEADER, DOW3_HEADER_DATA, DOW3_HEADER_DATA_BAD_MAGIC = fast_dow3_archive_header("Dawn Of War 3 Test Header", b" Marine!") # Big Brain Pun in ` Marine!` +# +# +# class TestDowIIIArchiveHeader(ArchiveHeaderTests): +# @pytest.mark.parametrize( +# ["archive", "cls"], +# [(None, v9.ArchiveHeader)]) +# def test_validate_checksums(self, archive: bytes, cls: Type[v9.ArchiveHeader]): +# for fast in TF: +# for _assert in TF: +# # HACK but if it fails it means logic has changed +# assert cls.validate_checksums(None, None, fast=fast, _assert=_assert) +# +# @pytest.mark.parametrize( +# ["buffer", "expected"], +# [(DOW3_HEADER_DATA[HDR_START:], DOW3_HEADER)], +# ) +# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): +# super().test_unpack(buffer, expected) +# +# @pytest.mark.parametrize( +# ["inst", "expected"], +# [(DOW3_HEADER, DOW3_HEADER_DATA[HDR_START:])]) +# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): +# super().test_pack(inst, expected) +# +# @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, v9.version)]) +# def test_version(self, archive: proto.ArchiveHeader, expected: Version): +# super().test_version(archive, expected) diff --git a/tests/relic/sga/datagen.py b/tests/relic/sga/datagen.py index 29ac4cb..6f04b41 100644 --- a/tests/relic/sga/datagen.py +++ b/tests/relic/sga/datagen.py @@ -3,7 +3,7 @@ from serialization_tools.ioutil import WindowPtr, Ptr -from relic.sga_old.protocols import ArchiveHeader +from relic.sga.protocols import ArchiveHeader from relic.sga_old.abc_old_ import FileABC, FolderABC, VirtualDriveABC, ArchiveTOC from relic.sga_old import v2, v5, v9 from relic.sga_old.common import ArchiveRange diff --git a/tests/relic/sga/file/test_file_header.py b/tests/relic/sga/file/test_file_header.py index d18ca15..fa60b2a 100644 --- a/tests/relic/sga/file/test_file_header.py +++ b/tests/relic/sga/file/test_file_header.py @@ -1,67 +1,67 @@ -from abc import abstractmethod -from io import BytesIO - -import pytest - -from relic.common import VersionLike -from relic.sga_old.common import ArchiveVersion -from relic.sga_old.abc_old_ import FileHeaderABC -from tests.relic.sga.datagen import DowI, DowII, DowIII - - -class FileHeaderTests: - @abstractmethod - def test_pack(self, header: FileHeaderABC, expected: bytes): - with BytesIO() as stream: - written = header.pack(stream) - assert written == len(expected) - stream.seek(0) - assert stream.read() == expected - - @abstractmethod - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): - with BytesIO(data_stream) as stream: - header = expected.__class__.unpack(stream) - assert header == expected - - -DOW1_HEADER, DOW1_HEADER_BUFFER = DowI.gen_file_header(0, 0, 0), DowI.gen_file_header_buffer(0, 0, 0) - - -class TestDowIFileHeader(FileHeaderTests): - @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - - @pytest.mark.parametrize(["expected", "data_stream"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_unpack(data_stream, expected) - - - -DOW2_HEADER, DOW2_HEADER_BUFFER = DowII.gen_file_header(0, 0, 0), DowII.gen_file_header_buffer(0, 0, 0) - - -class TestDowIIFileHeader(FileHeaderTests): - @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - @pytest.mark.parametrize(["expected", "data_stream"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_unpack(data_stream, expected) - - - -DOW3_HEADER, DOW3_HEADER_BUFFER = DowIII.gen_file_header(0x0f, 0xf0, 0x09, 0x90), DowIII.gen_file_header_buffer(0x0f, 0xf0, 0x09, 0x90) - - -class TestDowIIIFileHeader(FileHeaderTests): - @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_pack(self, header: FileHeaderABC, expected: bytes): - super().test_pack(header, expected) - - @pytest.mark.parametrize(["expected", "data_stream"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) - def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): - super().test_unpack(data_stream, expected) +# from abc import abstractmethod +# from io import BytesIO +# +# import pytest +# +# from relic.common import VersionLike +# from relic.sga_old.common import ArchiveVersion +# from relic.sga_old.abc_old_ import FileHeaderABC +# from tests.relic.sga.datagen import DowI, DowII, DowIII +# +# +# class FileHeaderTests: +# @abstractmethod +# def test_pack(self, header: FileHeaderABC, expected: bytes): +# with BytesIO() as stream: +# written = header.pack(stream) +# assert written == len(expected) +# stream.seek(0) +# assert stream.read() == expected +# +# @abstractmethod +# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): +# with BytesIO(data_stream) as stream: +# header = expected.__class__.unpack(stream) +# assert header == expected +# +# +# DOW1_HEADER, DOW1_HEADER_BUFFER = DowI.gen_file_header(0, 0, 0), DowI.gen_file_header_buffer(0, 0, 0) +# +# +# class TestDowIFileHeader(FileHeaderTests): +# @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) +# def test_pack(self, header: FileHeaderABC, expected: bytes): +# super().test_pack(header, expected) +# +# +# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) +# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): +# super().test_unpack(data_stream, expected) +# +# +# +# DOW2_HEADER, DOW2_HEADER_BUFFER = DowII.gen_file_header(0, 0, 0), DowII.gen_file_header_buffer(0, 0, 0) +# +# +# class TestDowIIFileHeader(FileHeaderTests): +# @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) +# def test_pack(self, header: FileHeaderABC, expected: bytes): +# super().test_pack(header, expected) +# +# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) +# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): +# super().test_unpack(data_stream, expected) +# +# +# +# DOW3_HEADER, DOW3_HEADER_BUFFER = DowIII.gen_file_header(0x0f, 0xf0, 0x09, 0x90), DowIII.gen_file_header_buffer(0x0f, 0xf0, 0x09, 0x90) +# +# +# class TestDowIIIFileHeader(FileHeaderTests): +# @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) +# def test_pack(self, header: FileHeaderABC, expected: bytes): +# super().test_pack(header, expected) +# +# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) +# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): +# super().test_unpack(data_stream, expected) diff --git a/tests/relic/sga/test_sga.py b/tests/relic/sga/test_sga.py index cda3fa5..679a8e9 100644 --- a/tests/relic/sga/test_sga.py +++ b/tests/relic/sga/test_sga.py @@ -1,4 +1,4 @@ -from __future__ import annotations +# from __future__ import annotations # from io import BytesIO # from relic.sga import Archive diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py index 18b5acc..eb1cd56 100644 --- a/tests/relic/sga/test_vX_interface.py +++ b/tests/relic/sga/test_vX_interface.py @@ -1,12 +1,11 @@ from typing import Iterable, List, Tuple -import relic.sga_old -from relic.sga_old import v2, v5,archive,v7_old, v9, vX +from relic.sga import ov2, v5, v7, v9, vX, apis import pytest -MODULES = [v2, v5,v7,v7_old, v9] +MODULES = [v2, v5,v7, v9] ATTRS = vX.required_attrs -APIS = relic.sga.APIS.values() +APIS = io.APIS.values() def _permutate(*items: List): From bb0a6b62fdf23f74e4f36f82d5b46abb2c2c9680 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sat, 11 Jun 2022 22:56:40 -0800 Subject: [PATCH 10/19] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f71c63d..55dd7a5 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ dist/* # Ignore debug-dev dumped Binary files (Files saved to cwd for fast examination) src/scripts/*.bin ignore/texconv.exe +tests/relic/sga/archive/file_sources.json From b180cf021476f0b67edb4d8f55db8a6151398167 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 01:04:14 -0800 Subject: [PATCH 11/19] New API, No CI Tests A lot of code is 'rewritten' instead of shared between versions; but I consider this a 'feature' for the most part. I'm sure I could hack something together, but this looks much cleaner then my previous two rewrites. Unfortunately, unlike the previous rewrites, tests were extremely broken; as 'headers' are replaced by metadata or ignored (due to being redundant). Rather than fix them in this commit; I added the ability to test against file sources on the local machine; in other words, I can test against REAL data. Unfortunately, unless I build my own dataset, this won't be used in CI to avoid distributing relic's assets. --- src/relic/sga/__init__.py | 19 +- src/relic/sga/_abc.py | 5 +- src/relic/sga/_apis.py | 7 + src/relic/sga/_core.py | 61 +++ src/relic/sga/_serializers.py | 18 +- src/relic/sga/apis.py | 19 - src/relic/sga/core.py | 628 ------------------------ src/relic/sga/error.py | 51 ++ src/relic/sga/ov2.py | 121 ----- src/relic/sga/protocols.py | 17 +- src/relic/sga/v2/_serializers.py | 8 +- src/relic/sga/v2/core.py | 2 +- src/relic/sga/v5.py | 149 ------ src/relic/sga/v5/_serializers.py | 10 +- src/relic/sga/v5/core.py | 6 +- src/relic/sga/v7.py | 124 ----- src/relic/sga/v7/_serializers.py | 17 +- src/relic/sga/v7/core.py | 7 +- src/relic/sga/v9.py | 116 ----- src/relic/sga/v9/__init__.py | 22 + src/relic/sga/v9/_serializers.py | 97 ++++ src/relic/sga/v9/core.py | 42 ++ src/relic/sga/vX.py | 36 -- src/relic/sga/writer_tester.py | 36 -- src/scripts/universal/sga/common.py | 2 +- src/scripts/universal/sga/unpack.py | 2 +- tests/relic/sga/archive/test_archive.py | 260 ++++++---- tests/relic/sga/datagen.py | 324 +++++++----- tests/relic/sga/test_vX_interface.py | 82 ++-- 29 files changed, 732 insertions(+), 1556 deletions(-) create mode 100644 src/relic/sga/_apis.py create mode 100644 src/relic/sga/_core.py delete mode 100644 src/relic/sga/apis.py delete mode 100644 src/relic/sga/core.py create mode 100644 src/relic/sga/error.py delete mode 100644 src/relic/sga/ov2.py delete mode 100644 src/relic/sga/v5.py delete mode 100644 src/relic/sga/v7.py delete mode 100644 src/relic/sga/v9.py create mode 100644 src/relic/sga/v9/__init__.py create mode 100644 src/relic/sga/v9/_serializers.py create mode 100644 src/relic/sga/v9/core.py delete mode 100644 src/relic/sga/vX.py delete mode 100644 src/relic/sga/writer_tester.py diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py index 2423fda..95bd115 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga/__init__.py @@ -1,17 +1,10 @@ -from typing import List, Dict +from __future__ import annotations -from relic.sga import protocols, v2, v5, v7 -from relic.sga.core import Version - -_APIS: List[protocols.API] = [v2.API, v5.API, v7.API] -apis: Dict[Version, protocols.API] = {api.version: api for api in _APIS} +from relic.sga._apis import apis as APIs +from relic.sga._core import Version, MagicWord __all__ = [ - "v2", - "v5", - "v7", - "v9", - "protocols", - "core", - "apis" + "APIs", + "Version", + "MagicWord" ] diff --git a/src/relic/sga/_abc.py b/src/relic/sga/_abc.py index 52d28e3..4f7d17d 100644 --- a/src/relic/sga/_abc.py +++ b/src/relic/sga/_abc.py @@ -9,8 +9,9 @@ from typing import List, Optional, Tuple, BinaryIO, Type, Generic from relic.sga import protocols as p -from relic.sga.protocols import TFileMetadata, StorageType, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer -from relic.sga.core import Version +from relic.sga.protocols import TFileMetadata, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer +from relic.sga._core import StorageType +from relic.sga.error import Version def _build_io_path(name: str, parent: Optional[p.IONode]) -> PurePath: diff --git a/src/relic/sga/_apis.py b/src/relic/sga/_apis.py new file mode 100644 index 0000000..8b648a9 --- /dev/null +++ b/src/relic/sga/_apis.py @@ -0,0 +1,7 @@ +from typing import List, Dict + +from relic.sga import v2, v5, v7, v9, protocols +from relic.sga._core import Version + +_APIS: List[protocols.API] = [v2.API, v5.API, v7.API, v9.API] +apis: Dict[Version, protocols.API] = {api.version: api for api in _APIS} diff --git a/src/relic/sga/_core.py b/src/relic/sga/_core.py new file mode 100644 index 0000000..4e78667 --- /dev/null +++ b/src/relic/sga/_core.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from serialization_tools.structx import Struct +from typing import Optional, ClassVar, BinaryIO + +from serialization_tools.magic import MagicWordIO + + +MagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) + + +@dataclass +class Version: + """ The Major Version; Relic refers to this as the 'Version' """ + major: int + """ The Minor Version; Relic refers to this as the 'Product' """ + minor: Optional[int] = 0 + + LAYOUT: ClassVar[Struct] = Struct("<2H") + + def __str__(self) -> str: + return f"Version {self.major}.{self.minor}" + + def __eq__(self, other): + if isinstance(other, Version): + return self.major == other.major and self.minor == other.minor + else: + return super().__eq__(other) + + def __hash__(self): + # Realistically; Version will always be <256 + # But we could manually set it to something much bigger by accident; and that may cause collisions + return self.major << (self.LAYOUT.size // 2) + self.minor + + @classmethod + def unpack(cls, stream: BinaryIO): + layout: Struct = cls.LAYOUT + args = layout.unpack_stream(stream) + return cls(*args) + + def pack(self, stream: BinaryIO): + layout: Struct = self.LAYOUT + args = (self.major, self.minor) + return layout.pack_stream(stream, *args) + + + +class StorageType(int, Enum): + Store = 0 + BufferCompress = 1 + StreamCompress = 2 + + +class VerificationType(int, Enum): + None_ = 0 # unknown real values, assuming incremental + CRC = 1 # unknown real values, assuming incremental + CRCBlocks = 2 # unknown real values, assuming incremental + MD5Blocks = 3 # unknown real values, assuming incremental + SHA1Blocks = 4 # unknown real values, assuming incremental diff --git a/src/relic/sga/_serializers.py b/src/relic/sga/_serializers.py index d6e825a..6e52d04 100644 --- a/src/relic/sga/_serializers.py +++ b/src/relic/sga/_serializers.py @@ -23,7 +23,7 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO) -> TocHeader: - drive_pos, drive_count, folder_pos, folder_count, file_pos, file_count, name_pos, name_count = self.layout.unpack(stream) + drive_pos, drive_count, folder_pos, folder_count, file_pos, file_count, name_pos, name_count = self.layout.unpack_stream(stream) return TocHeader((drive_pos, drive_count), (folder_pos, folder_count), (file_pos, file_count), (name_pos, name_count)) def pack(self, stream: BinaryIO, value: TocHeader) -> int: @@ -57,7 +57,7 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO) -> FolderDef: - name_pos, folder_start, folder_end, file_start, file_end, root_folder = self.layout.unpack_stream(stream) + name_pos, folder_start, folder_end, file_start, file_end = self.layout.unpack_stream(stream) folder_range = (folder_start, folder_end) file_range = (file_start, file_end) return FolderDef(name_pos=name_pos, folder_range=folder_range, file_range=file_range) @@ -94,8 +94,8 @@ def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderD for folder in folders: _apply_self_as_parent(folder) - - drive_folder = folders[drive_def.root_folder] + root_folder = drive_def.root_folder - drive_def.folder_range[0] # make root folder relative to our folder slice + drive_folder = folders[root_folder] drive = _abc.Drive(drive_def.alias, drive_def.name, drive_folder.sub_folders, drive_folder.files) _apply_self_as_parent(drive) all_files.extend(files) @@ -117,8 +117,8 @@ def _unpack_helper(stream: BinaryIO, toc_info: Tuple[int, int], header_pos: int, def _read_toc_definitions(stream: BinaryIO, toc: TocHeader, header_pos: int, drive_serializer: StreamSerializer[DriveDef], folder_serializer: StreamSerializer[FolderDef], file_serializer: StreamSerializer[FileDefABC]): drives = _unpack_helper(stream, toc.drive_info, header_pos, drive_serializer) - folders = _unpack_helper(stream, toc.drive_info, header_pos, folder_serializer) - files = _unpack_helper(stream, toc.drive_info, header_pos, file_serializer) + folders = _unpack_helper(stream, toc.folder_info, header_pos, folder_serializer) + files = _unpack_helper(stream, toc.file_info, header_pos, file_serializer) return drives, folders, files @@ -128,7 +128,7 @@ def _read_toc_names_as_count(stream: BinaryIO, toc_info: Tuple[int, int], header names: Dict[int, str] = {} running_buffer = bytearray() offset = 0 - while len(names) < toc_info[0]: + while len(names) < toc_info[1]: buffer = stream.read(buffer_size) if len(buffer) == 0: raise Exception("Ran out of data!") # TODO, proper exception @@ -146,7 +146,7 @@ def _read_toc_names_as_count(stream: BinaryIO, toc_info: Tuple[int, int], header offset += len(buffer) continue - remaining = toc_info[0] - len(names) + remaining = toc_info[1] - len(names) available = min(len(parts), remaining) for _ in range(available): name = parts[_] @@ -158,7 +158,7 @@ def _read_toc_names_as_count(stream: BinaryIO, toc_info: Tuple[int, int], header def _read_toc_names_as_size(stream: BinaryIO, toc_info: Tuple[int, int], header_pos: int) -> Dict[int, str]: stream.seek(header_pos + toc_info[0]) name_buffer = stream.read(toc_info[1]) - parts = name_buffer.split(b"") + parts = name_buffer.split(b"\0") names: Dict[int, str] = {} offset = 0 for part in parts: diff --git a/src/relic/sga/apis.py b/src/relic/sga/apis.py deleted file mode 100644 index fbe1302..0000000 --- a/src/relic/sga/apis.py +++ /dev/null @@ -1,19 +0,0 @@ -# from typing import Dict, BinaryIO -# -# from relic.sga import ov2, v5, v7, v9, vX -# from relic.sga.core import Version, ArchiveABC, VersionNotSupportedError -# -# __APIS = [ov2.API, v5.API, v7.API, v9.API] -# APIS: Dict[Version, vX.APIvX] = {api.version: api for api in __APIS} -# -# -# def read_archive(stream: BinaryIO, sparse: bool = False, apis: Dict[Version, vX.APIvX] = None) -> ArchiveABC: -# apis = APIS if apis is None else apis -# ArchiveABC.MAGIC.read_magic_word(stream) -# version = Version.unpack(stream) -# try: -# api = apis[version] -# except KeyError: -# raise VersionNotSupportedError(version,list(apis.keys())) -# version.assert_version_matches(api.version) -# return api.Archive._read(stream, sparse) diff --git a/src/relic/sga/core.py b/src/relic/sga/core.py deleted file mode 100644 index aa882c7..0000000 --- a/src/relic/sga/core.py +++ /dev/null @@ -1,628 +0,0 @@ -from __future__ import annotations - -import zlib -from abc import abstractmethod -from dataclasses import dataclass -from enum import Enum -from io import BytesIO -from pathlib import PurePath, PureWindowsPath -from typing import ClassVar, BinaryIO, Optional, List, Type, Dict, Tuple, Any, Protocol, Iterable, Union, Generic, TypeVar - -from serialization_tools.ioutil import WindowPtr -from serialization_tools.magic import MagicWordIO -from serialization_tools.structx import Struct - -MagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) -# -# T = TypeVar("T") -# class StreamSerializer(Generic[T], Protocol): -# def unpack(self, stream:BinaryIO) -> T: -# raise NotImplementedError -# def pack(self, stream:BinaryIO, value:T) -> int: -# raise NotImplementedError -# -# # Dont use dataclass -# class ArchivePathable(Protocol): -# _parent_path: Optional[ArchivePathable] -# -# @property -# def path(self) -> PurePath: -# raise NotImplementedError -# -# -# class ArchiveWalkable(Protocol): -# def walk(self) -> ArchiveWalk: -# raise NotImplementedError -# -# -# class FileVerificationType(Enum): -# None_ = 0 # unknown real values, assuming incremental -# CRC = 1 # unknown real values, assuming incremental -# CRCBlocks = 2 # unknown real values, assuming incremental -# MD5Blocks = 3 # unknown real values, assuming incremental -# SHA1Blocks = 4 # unknown real values, assuming incremental -# -# -# class FileStorageType(Enum): -# Store = 0 -# StreamCompress = 1 # 16 in v2 (old-engine binding) -# BufferCompress = 2 # 32 in v2 (old-engine binding) - - -@dataclass -class Version: - """ The Major Version; Relic refers to this as the 'Version' """ - major: int - """ The Minor Version; Relic refers to this as the 'Product' """ - minor: Optional[int] = 0 - - LAYOUT: ClassVar[Struct] = Struct("<2H") - - def __str__(self) -> str: - return f"Version {self.major}.{self.minor}" - - def __eq__(self, other): - if isinstance(other, Version): - return self.major == other.major and self.minor == other.minor - else: - return super().__eq__(other) - - def __hash__(self): - # Realistically; Version will always be <256 - # But we could manually set it to something much bigger by accident; and that may cause collisions - return self.major << (self.LAYOUT.size // 2) + self.minor - - @classmethod - def unpack(cls, stream: BinaryIO): - layout: Struct = cls.LAYOUT - args = layout.unpack_stream(stream) - return cls(*args) - - def pack(self, stream: BinaryIO): - layout: Struct = self.LAYOUT - args = (self.major, self.minor) - return layout.pack_stream(stream, *args) - - def assert_version_matches(self, expected: Version): - if self != expected: - raise VersionMismatchError(self, expected) - - -def _print_mismatch(name: str, received, expected): - msg = f"Unexpected {name}" - if received or expected: - msg += ";" - if received: - msg += f" got `{str(received)}`" - if received and expected: - msg += "," - if expected: - msg += f" expected `{str(expected)}`" - return msg + "!" - - -class MismatchError(Exception): - def __init__(self, name: str, received: Any = None, expected: Any = None): - self.name = name - self.received = received - self.expected = expected - - def __str__(self): - return _print_mismatch(self.name, self.received, self.expected) - - -class VersionMismatchError(MismatchError): - def __init__(self, received: Version = None, expected: Version = None): - super().__init__("Version", received, expected) - - -class MD5MismatchError(MismatchError): - def __init__(self, received: bytes = None, expected: bytes = None): - super().__init__("MD5", received, expected) - - -class VersionNotSupportedError(Exception): - def __init__(self, received: Version, allowed: List[Version]): - self.received = received - self.allowed = allowed - - def __str__(self): - def str_ver(v: Version) -> str: # dont use str(version); too verbose - return f"{v.major}.{v.minor}" - - allowed_str = [str_ver(_) for _ in self.allowed] - return f"Version `{str_ver(self.received)}` is not supported. Versions supported: `{allowed_str}`" - - -# def _read_names_as_lookup(stream: BinaryIO, name_count_or_size: int, is_count: bool = True): -# BUF_SIZE = 64 # stolen from archive reader -# lookup = {} -# offset = 0 -# if not is_count: -# buffer = stream.read(name_count_or_size) # size -# names = [_.decode("ascii") for _ in buffer.split(b"\0")] -# for name in names: -# lookup[offset] = name -# offset += len(name) + 1 -# return lookup -# else: -# # THIS GETS COMPLICATED -# start_pos = stream.tell() -# current_name = b"" -# # While we still need to reaad names -# while len(lookup) < name_count_or_size: -# # Read a partial buffer in -# buffer = stream.read(BUF_SIZE) -# if len(buffer) == 0: -# raise Exception("Buffer ran out of data!") -# # Try to do a fast separate on the null byte -# enc_names = buffer.split(b"\0") -# current_name += enc_names[0] -# # Needs more data (no b"\0" was found) -# if len(enc_names) == 1 and len(buffer) == BUF_SIZE: -# continue -# else: -# # Handle [0] -# lookup[offset] = current_name.decode("ascii") -# offset += len(current_name) + 1 -# current_name = b"" -# # Handle [1,N] by seeking to offset and looping again -# stream.seek(start_pos + offset) -# continue -# return lookup -# -# -# @dataclass -# class BlobPtrs: -# header_pos: int -# header_size: Optional[int] -# data_pos: int -# data_size: Optional[int] -# -# -# @dataclass -# class ToCPtrsABC: -# vdrive_rel_pos: int -# vdrive_count: int -# folder_rel_pos: int -# folder_count: int -# file_rel_pos: int -# file_count: int -# name_rel_pos: int -# name_count_or_size: int # meaning varies between version -# -# LAYOUT: ClassVar[Struct] -# """ Only 'counts' are uint16s """ -# LAYOUT_UINT16: ClassVar = Struct(" PurePath: -# return PurePath(f"{self.alias}:/") -# -# def walk(self) -> ArchiveWalk: -# yield self, self.folders, self.files -# for folder in self.folders: -# for _, local_folder, sub_folders, files in folder.walk(): -# yield self, local_folder, sub_folders, files -# -# -# @dataclass -# class FolderDefABC: -# name_rel_pos: int -# folder_start: int -# folder_end: int -# file_start: int -# file_end: int -# -# LAYOUT: ClassVar[Struct] -# LAYOUT_UINT32: ClassVar = Struct(" PurePath: -# if self._parent_path: -# return self._parent_path.path / self.name -# else: -# return PurePath(self.name) -# -# def walk(self) -> ArchiveWalk: -# yield self, self.folders, self.files -# for folder in self.folders: -# for _ in folder.walk(): -# yield _ -# -# -# @dataclass -# class FileSparseInfo: -# storage_type: FileStorageType # Redundancy -# abs_data_pos: int # Absolute data position -# size_on_disk: int -# size_in_archive: int -# -# def read(self, stream: BinaryIO) -> bytes: -# if self.size_in_archive == 0: -# return b"" -# else: -# with WindowPtr(self.abs_data_pos, self.size_in_archive).stream_jump_to(stream) as window: -# file_data = window.read() -# if self.storage_type == FileStorageType.Store: -# return file_data -# elif self.storage_type in [FileStorageType.StreamCompress, FileStorageType.BufferCompress]: -# return zlib.decompress(file_data) -# else: -# raise NotImplementedError(f"Reading a file stored as `{self.storage_type}` is not supported!") -# -# -# @dataclass -# class FileMetaABC: -# storage: FileStorageType -# -# -# @dataclass -# class FileABC(ArchivePathable): -# name: str -# meta: FileMetaABC -# data: Optional[bytes] = None -# sparse_info: Optional[FileSparseInfo] = None -# _parent_path: Optional[ArchivePathable] = None -# -# @property -# def path(self) -> PurePath: -# if self._parent_path: -# return self._parent_path.path / self.name -# else: -# return PurePath(self.name) -# -# def read_data(self, stream: BinaryIO): -# self.data = self.sparse_info.read(stream) -# -# -# class ArchiveMetaABC: -# ... # TODO -# -# -# class ArchiveFlattener: -# # FILE_DEF_CLS: Type[FileDefABC] = FileDefABC -# FOLDER_DEF_CLS: Type[FolderDefABC] = FolderDefABC -# DRIVE_DEF_CLS: Type[DriveDefABC] = DriveDefABC -# -# def __init__(self, name_stream: BinaryIO, data_stream: BinaryIO, drive_def_cls: Optional[Type[DriveDefABC]] = None, folder_def_cls: Optional[Type[FolderDefABC]] = None): -# if drive_def_cls is not None: -# self.DRIVE_DEF_CLS = drive_def_cls -# if folder_def_cls is not None: -# self.FOLDER_DEF_CLS = folder_def_cls -# -# self.files: List[FileDefABC] = [] -# self.folders: List[FolderDefABC] = [] -# self.drives: List[DriveDefABC] = [] -# self.name_stream: BinaryIO = name_stream -# self._name_stream_offset: int = 0 -# self._data_stream_offset: int = 0 -# self.data_stream: BinaryIO = data_stream -# self._name_lookup: Dict[str, int] = {} -# -# def get_name_rel_pos(self, name: str) -> int: -# if name in self._name_lookup: -# return self._name_lookup[name] -# else: -# this_name_offset = self._name_lookup[name] = self._name_stream_offset -# self._name_stream_offset += self.name_stream.write(name.encode("ascii") + b"\0") -# return this_name_offset -# -# def get_name_rel_pos_from_path(self, pathable: ArchivePathable, root: DriveABC) -> int: -# path = pathable.path -# root_path = root.path -# rel_path = path.relative_to(root_path) -# name = str(rel_path) -# if name == "." and root_path == path: -# name = "" -# return self.get_name_rel_pos(name) -# -# @staticmethod -# def repackage_data(data: bytes, storage: FileStorageType) -> Tuple[bytes, int, int]: -# if storage == storage.Store: -# return data, len(data), len(data) -# else: -# comp_data = zlib.compress(data) -# return comp_data, len(data), len(comp_data) -# -# def get_data_rel_pos(self, data: bytes) -> int: -# offset = self._data_stream_offset -# self.data_stream.write(data) -# return offset -# -# @abstractmethod -# def build_file_def(self, file: FileABC, name_rel_pos: int, data_rel_pos: int, length: int, store_length: int, storage: FileStorageType) -> FileDefABC: -# raise NotImplementedError -# # return FileDefABC(name_rel_pos, data_rel_pos, length, store_length, storage) -# -# def flatten_file(self, file: FileABC): -# name_rel_pos = self.get_name_rel_pos(file.name) # files use name-only -# data_buffer, length, store_length = self.repackage_data(file.data, file.meta.storage) -# data_rel_pos = self.get_data_rel_pos(data_buffer) -# file_def = self.build_file_def(file, name_rel_pos, data_rel_pos, length, store_length, file.meta.storage) # FileDefABC(name_rel_pos,data_rel_pos,length,store_length,file.meta.storage) -# return file_def -# -# def flatten_folder(self, folder: FolderABC, root: DriveABC): -# folder_def_cls:Type[FolderDefABC] = self.FOLDER_DEF_CLS -# name_rel_pos = self.get_name_rel_pos_from_path(folder, root) -# folder_start = len(self.folders) -# file_start = len(self.files) -# folder_def = folder_def_cls(name_rel_pos, folder_start, folder_start + len(folder.folders), file_start, file_start + len(folder.files)) -# -# self.folders.extend([None] * len(folder.folders)) # Reserve space for sub-folders -# self.files.extend([None] * len(folder.files)) # Reserve space for subfiles -# -# for i, sub_folder in enumerate(folder.folders): -# self.folders[folder_start + i] = self.flatten_folder(sub_folder, root) -# for i, file in enumerate(folder.files): -# self.files[file_start + i] = self.flatten_file(file) -# return folder_def -# -# def flatten_drive(self, drive: DriveABC) -> DriveDefABC: -# drive_def_cls: Type[DriveDefABC] = self.DRIVE_DEF_CLS -# drive_folder_root = len(self.folders) -# drive_file_start = len(self.files) -# drive_def = drive_def_cls(drive.alias, drive.name, drive_folder_root, None, drive_file_start, None, drive_folder_root) -# -# self.folders.extend([None]) # Reserve space for root -# -# self.folders[drive_folder_root] = self.flatten_folder(drive, drive) # drive is technically a folder; but this should be fixed for better type-safety -# -# drive_def.folder_end = len(self.folders) -# drive_def.file_end = len(self.files) -# return drive_def -# -# def flatten_archive(self, archive: ArchiveABC): -# for drive in archive.drives: -# drive_def = self.flatten_drive(drive) -# self.drives.append(drive_def) -# -# @dataclass -# class ArchiveABC(ArchiveWalkable): -# MAGIC: ClassVar = MagicWord -# VERSION: ClassVar[Version] -# name: str -# meta: ArchiveMetaABC -# drives: List[DriveABC] -# -# # header_size: int # Not required -# # data_offset: int # Not required -# -# # header_offset: int -# -# TOC_PTRS: ClassVar[Type[ToCPtrsABC]] -# VDRIVE_DEF: ClassVar[Type[DriveDefABC]] -# FOLDER_DEF: ClassVar[Type[FolderDefABC]] -# FILE_DEF: ClassVar[Type[FileDefABC]] -# NAME_BUFFER_USES_COUNT: ClassVar[bool] = True -# -# @classmethod -# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: -# raise NotImplementedError -# -# def _pack_meta(self, stream:BinaryIO, ): -# raise NotImplementedError -# -# @classmethod -# def _read_toc(cls, header_stream: BinaryIO, header_pos: int, toc_ptrs: ToCPtrsABC): -# vdrive_stream = header_stream -# vdrive_stream.seek(header_pos + toc_ptrs.vdrive_rel_pos) -# vdrive_defs = [cls.VDRIVE_DEF.unpack(vdrive_stream) for _ in range(toc_ptrs.vdrive_count)] -# -# folders_stream = header_stream -# folders_stream.seek(header_pos + toc_ptrs.folder_rel_pos) -# folder_defs = [cls.FOLDER_DEF.unpack(folders_stream) for _ in range(toc_ptrs.folder_count)] -# -# files_stream = header_stream -# files_stream.seek(header_pos + toc_ptrs.file_rel_pos) -# file_defs = [cls.FILE_DEF.unpack(files_stream) for _ in range(toc_ptrs.file_count)] -# -# name_stream = header_stream -# name_stream.seek(header_pos + toc_ptrs.name_rel_pos) -# names = _read_names_as_lookup(name_stream, toc_ptrs.name_count_or_size, is_count=cls.NAME_BUFFER_USES_COUNT) -# -# return vdrive_defs, folder_defs, file_defs, names -# -# @classmethod -# def _write_toc(cls, header_stream:BinaryIO, drives:List[DriveDefABC], folders:List[FolderDefABC], files:List[FileDefABC], name_buffer:bytes, name_count_or_size:int) -> ToCPtrsABC: -# # The order shouldn't matter; but I follow the generally used format (that I've seen) of drive/folder/file/names -# drive_rel_pos, drive_count = header_stream.tell(), len(drives) -# for drive in drives: -# drive.pack(header_stream) -# -# folder_rel_pos, folder_count = header_stream.tell(), len(folders) -# for folder in folders: -# folder.pack(header_stream) -# -# file_rel_pos, file_count = header_stream.tell(), len(files) -# for file in files: -# file.pack(header_stream) -# -# name_rel_pos, name_count = header_stream.tell(), name_count_or_size -# header_stream.write(name_buffer) -# return cls.TOC_PTRS(drive_rel_pos,drive_count,folder_rel_pos,folder_count,file_rel_pos,file_count,name_rel_pos,name_count) -# -# @classmethod -# def _assemble_files(cls, file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): -# raise NotImplementedError -# -# @classmethod -# def _assemble_folders(cls, folder_defs: List[FolderDefABC], files: List[FileABC], names: Dict[int, str]): -# folders: List[FolderABC] = [] -# for f_def in folder_defs: -# full_name = names[f_def.name_rel_pos] -# if full_name != "": -# name = str(PureWindowsPath(full_name).parts[-1]) # hack to get last portion of pathed-name -# else: -# name = "" -# folder = FolderABC(name, None, files[f_def.file_start:f_def.file_end + 1], _flat_name=full_name) -# folders.append(folder) -# -# for file in folder.files: # Link files to parent -# file._parent_path = folder -# -# for f_def, folder in zip(folder_defs, folders): -# folder.folders = folders[f_def.folder_start:f_def.folder_end + 1] -# -# for subfolder in folder.folders: # Link folders to parent -# subfolder._parent_path = folder -# -# return folders -# -# @classmethod -# def _assemble_drives(cls, drive_defs: List[DriveDefABC], folders: List[FolderABC]): -# drives: List[DriveABC] = [] -# for d_def in drive_defs: -# folder = folders[d_def.folder_root] -# drive = DriveABC(folder.folders, folder.files, d_def.alias, d_def.name) -# drives.append(drive) -# -# # Relink folders/files to drive (instead of folder) -# for file in drive.files: -# file._parent_path = drive -# for folder in drive.folders: -# folder._parent_path = drive -# -# return drives -# -# @classmethod -# def _assemble_hierarchy(cls, vdrive_defs: List[DriveDefABC], folder_defs: List[FolderDefABC], file_defs: List[FileDefABC], names: Dict[int, str], data_pos: int): -# files = cls._assemble_files(file_defs, names, data_pos) -# folders = cls._assemble_folders(folder_defs, files, names) -# vdrives = cls._assemble_drives(vdrive_defs, folders) -# return vdrives, folders, files -# -# @classmethod -# def _read(cls, stream: BinaryIO, sparse: bool = False): -# name, meta, blob_ptrs, toc_ptrs = cls._unpack_meta(stream) -# -# # TOC Block -# vdrive_defs, folder_defs, file_defs, names = cls._read_toc(stream, blob_ptrs.header_pos, toc_ptrs) -# -# vdrives, _, files = cls._assemble_hierarchy(vdrive_defs, folder_defs, file_defs, names, blob_ptrs.data_pos) -# -# if not sparse: -# for file in files: -# file.read_data(stream) -# -# return cls(name, meta, vdrives) -# -# def _write_parts(self,out_stream:BinaryIO,): -# -# def _write(self, stream: BinaryIO) -> int: -# with BytesIO() as data_stream: -# with BytesIO() as name_stream: -# flattener = ArchiveFlattener(name_stream,data_stream,drive_def_cls=self.VDRIVE_DEF,folder_def_cls=self.FOLDER_DEF) -# flattener.flatten_archive(self) -# name_stream.seek(0) -# name_buffer = name_stream.read() -# with BytesIO() as header_stream: -# name_count_or_size = len(flattener._name_lookup) if self.NAME_BUFFER_USES_COUNT else len(name_buffer) -# toc = self._write_toc(header_stream,flattener.drives,flattener.folders,flattener.files,name_buffer,name_count_or_size) -# -# with BytesIO() as meta_stream: -# raise NotImplementedError -# -# @classmethod -# def read(cls, stream: BinaryIO, sparse: bool = False): -# magic: MagicWordIO = cls.MAGIC -# magic.read_magic_word(stream) -# archive_version = Version.unpack(stream) -# archive_version.assert_version_matches(cls.VERSION) -# return cls._read(stream, sparse) -# -# def write(self, stream: BinaryIO) -> int: -# magic: MagicWordIO = self.MAGIC -# version: Version = self.VERSION -# written = 0 -# written += magic.write_magic_word(stream) -# written += version.pack(stream) -# written += self._write(stream) -# return written -# -# def walk(self) -> ArchiveWalk: -# for drive in self.drives: -# for _ in drive.walk(): -# yield _ -# -# -# -# ArchiveWalk = Tuple[Union[DriveABC, FolderABC], Iterable[FolderABC], Iterable[FileABC]] diff --git a/src/relic/sga/error.py b/src/relic/sga/error.py new file mode 100644 index 0000000..afd833b --- /dev/null +++ b/src/relic/sga/error.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import List, Any + +from relic.sga._core import Version + + +def _print_mismatch(name: str, received, expected): + msg = f"Unexpected {name}" + if received or expected: + msg += ";" + if received: + msg += f" got `{str(received)}`" + if received and expected: + msg += "," + if expected: + msg += f" expected `{str(expected)}`" + return msg + "!" + + +class MismatchError(Exception): + def __init__(self, name: str, received: Any = None, expected: Any = None): + self.name = name + self.received = received + self.expected = expected + + def __str__(self): + return _print_mismatch(self.name, self.received, self.expected) + + +class VersionMismatchError(MismatchError): + def __init__(self, received: Version = None, expected: Version = None): + super().__init__("Version", received, expected) + + +class MD5MismatchError(MismatchError): + def __init__(self, received: bytes = None, expected: bytes = None): + super().__init__("MD5", received, expected) + + +class VersionNotSupportedError(Exception): + def __init__(self, received: Version, allowed: List[Version]): + self.received = received + self.allowed = allowed + + def __str__(self): + def str_ver(v: Version) -> str: # dont use str(version); too verbose + return f"{v.major}.{v.minor}" + + allowed_str = [str_ver(_) for _ in self.allowed] + return f"Version `{str_ver(self.received)}` is not supported. Versions supported: `{allowed_str}`" diff --git a/src/relic/sga/ov2.py b/src/relic/sga/ov2.py deleted file mode 100644 index 5cfd42f..0000000 --- a/src/relic/sga/ov2.py +++ /dev/null @@ -1,121 +0,0 @@ -# from __future__ import annotations -# -# import hashlib -# from dataclasses import dataclass -# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -# -# from serialization_tools.size import KiB -# from serialization_tools.structx import Struct -# -# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, MD5MismatchError -# from relic.sga.vX import APIvX -# -# -# class _ToCPtrs(ToCPtrsABC): -# LAYOUT = ToCPtrsABC.LAYOUT_UINT16 -# -# -# class _DriveDef(DriveDefABC): -# LAYOUT = DriveDefABC.LAYOUT_UINT16 -# -# -# class _FolderDef(FolderDefABC): -# LAYOUT = FolderDefABC.LAYOUT_UINT16 -# -# -# version = Version(2) -# -# -# @dataclass -# class FileDef(FileDefABC): -# LAYOUT = Struct("<5I") -# -# @classmethod -# def unpack(cls, stream: BinaryIO): -# name_rel_pos, storage_type_val_v2, data_rel_pos, length, store_length = cls.LAYOUT.unpack_stream(stream) -# storage_type_map = {0: FileStorageType.Store, 16: FileStorageType.StreamCompress, 32: FileStorageType.BufferCompress} -# storage_type = storage_type_map[storage_type_val_v2] -# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type) -# -# -# FileMeta = FileMetaABC -# File = FileABC -# Folder = FolderABC -# Drive = DriveABC -# -# -# @dataclass -# class ArchiveMeta(ArchiveMetaABC): -# file_md5: bytes -# header_md5: bytes -# blob_ptr: BlobPtrs # Cached for MD5 -# FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" -# HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" -# -# @staticmethod -# def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): -# _BUF_SIZE = 256 * KiB -# hasher = hashlib.md5(eigen) -# stream.seek(start) -# if size is None: -# while True: -# buffer = stream.read(_BUF_SIZE) -# hasher.update(buffer) -# if len(buffer) != _BUF_SIZE: -# break -# else: -# read = 0 -# while read < size: -# buffer = stream.read(min(_BUF_SIZE, size - read)) -# read += len(buffer) -# hasher.update(buffer) -# md5 = bytes.fromhex(hasher.hexdigest()) -# if md5 != expected: -# raise MD5MismatchError(md5, expected) -# -# def validate_file_md5(self, stream: BinaryIO): -# self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) -# -# def validate_header_md5(self, stream: BinaryIO): -# self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) -# -# -# class Archive(ArchiveABC): -# meta: ArchiveMeta -# # drives: List[Drive] # typing -# -# TOC_PTRS = _ToCPtrs -# VDRIVE_DEF = _DriveDef -# FOLDER_DEF = _FolderDef -# FILE_DEF = FileDef -# VERSION = version -# META_PREFIX_LAYOUT = Struct("<16s 128s 16s 2I") -# -# @classmethod -# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): -# files = [] -# for f_def in file_defs: -# meta = FileMeta(f_def.storage_type) -# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) -# file = File(names[f_def.name_rel_pos], meta, None, sparse) -# files.append(file) -# return files -# -# @classmethod -# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: -# encoded_name: bytes -# file_md5, encoded_name, header_md5, header_size, data_pos = cls.META_PREFIX_LAYOUT.unpack_stream(stream) -# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") -# header_pos = stream.tell() -# toc_ptrs = cls.TOC_PTRS.unpack(stream) -# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) -# meta = ArchiveMeta(file_md5, header_md5, blob_ptrs) -# return decoded_name, meta, blob_ptrs, toc_ptrs -# -# -# class API(APIvX): -# version = version -# Archive = Archive -# File = File -# Folder = Folder -# Drive = Drive diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index 2ef0a8c..2910814 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -1,13 +1,12 @@ from __future__ import annotations from contextlib import contextmanager -from dataclasses import dataclass -from enum import Enum from pathlib import PurePath from types import ModuleType from typing import TypeVar, Protocol, List, Optional, ForwardRef, Tuple, Iterable, BinaryIO, Type, runtime_checkable from relic.common import Version +from relic.sga._core import StorageType FileFwd = ForwardRef("File") FolderFwd = ForwardRef("Folder") @@ -31,20 +30,6 @@ def pack(self, stream: BinaryIO, value: T) -> int: raise NotImplementedError -class StorageType(int, Enum): - Store = 0 - BufferCompress = 1 - StreamCompress = 2 - - -class VerificationType(int, Enum): - None_ = 0 # unknown real values, assuming incremental - CRC = 1 # unknown real values, assuming incremental - CRCBlocks = 2 # unknown real values, assuming incremental - MD5Blocks = 3 # unknown real values, assuming incremental - SHA1Blocks = 4 # unknown real values, assuming incremental - - @runtime_checkable class IOPathable(Protocol): @property diff --git a/src/relic/sga/v2/_serializers.py b/src/relic/sga/v2/_serializers.py index 5cf7d38..89e2d99 100644 --- a/src/relic/sga/v2/_serializers.py +++ b/src/relic/sga/v2/_serializers.py @@ -6,9 +6,10 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import FileDefABC as FileDef, Archive +from relic.sga.error import VersionMismatchError from relic.sga.v2 import core -from relic.sga.core import MagicWord, Version -from relic.sga.protocols import StreamSerializer, StorageType +from relic.sga._core import MagicWord, Version, StorageType +from relic.sga.protocols import StreamSerializer folder_layout = Struct(" Archive: MagicWord.read_magic_word(stream) version = Version.unpack(stream) - version.assert_version_matches(self.version) + if version != self.version: + raise VersionMismatchError(version,self.version) name: bytes file_md5, name, header_md5, header_size, data_pos = self.layout.unpack_stream(stream) diff --git a/src/relic/sga/v2/core.py b/src/relic/sga/v2/core.py index 69f600d..69834e3 100644 --- a/src/relic/sga/v2/core.py +++ b/src/relic/sga/v2/core.py @@ -4,7 +4,7 @@ from typing import Optional, BinaryIO from relic.sga import _abc -from relic.sga.core import Version +from relic.sga.error import Version version = Version(2) diff --git a/src/relic/sga/v5.py b/src/relic/sga/v5.py deleted file mode 100644 index 49211b8..0000000 --- a/src/relic/sga/v5.py +++ /dev/null @@ -1,149 +0,0 @@ -# from __future__ import annotations -# -# import hashlib -# from dataclasses import dataclass -# from datetime import datetime, timezone -# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -# -# from serialization_tools.size import KiB -# from serialization_tools.structx import Struct -# -# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC, MD5MismatchError -# from relic.sga.vX import APIvX -# -# version = Version(5) -# -# -# class _ToCPtrs(ToCPtrsABC): -# LAYOUT = ToCPtrsABC.LAYOUT_UINT16 -# -# -# class _DriveDef(DriveDefABC): -# LAYOUT = DriveDefABC.LAYOUT_UINT16 -# -# -# class _FolderDef(FolderDefABC): -# LAYOUT = FolderDefABC.LAYOUT_UINT16 -# -# -# @dataclass -# class FileDef(FileDefABC): -# LAYOUT = Struct("<5I 2B") -# # v7 Specific data -# modified: datetime # Unix EPOCH -# verification_type: FileVerificationType -# -# @classmethod -# def unpack(cls, stream: BinaryIO): -# # print(stream.tell()) -# name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val = cls.LAYOUT.unpack_stream(stream) -# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) -# storage_type = FileStorageType(storage_type_val) -# verification_type = FileVerificationType(verification_type_val) -# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type) -# -# -# @dataclass -# class FileMeta(FileMetaABC): -# modified: datetime -# verification: FileVerificationType -# storage: FileStorageType -# -# -# class File(FileABC): -# meta: FileMeta -# -# -# @dataclass -# class Folder(FolderABC): -# folders: List[Folder] -# files: List[File] -# -# -# class Drive(DriveABC): -# folders: List[Folder] -# files: List[File] -# -# -# @dataclass -# class ArchiveMeta(ArchiveMetaABC): -# file_md5: bytes -# header_md5: bytes -# blob_ptr: BlobPtrs # Cached for MD5 -# unk_a: int -# FILE_MD5_EIGEN: ClassVar = b"E01519D6-2DB7-4640-AF54-0A23319C56C3" -# HEADER_MD5_EIGEN: ClassVar = b"DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF" -# -# @staticmethod -# def _validate_md5(stream: BinaryIO, start: int, size: Optional[int], eigen: bytes, expected: bytes): -# _BUF_SIZE = 256 * KiB -# hasher = hashlib.md5(eigen) -# stream.seek(start) -# if size is None: -# while True: -# buffer = stream.read(_BUF_SIZE) -# hasher.update(buffer) -# if len(buffer) != _BUF_SIZE: -# break -# else: -# read = 0 -# while read < size: -# buffer = stream.read(min(_BUF_SIZE, size - read)) -# read += len(buffer) -# hasher.update(buffer) -# md5 = bytes.fromhex(hasher.hexdigest()) -# if md5 != expected: -# raise MD5MismatchError(md5, expected) -# -# def validate_file_md5(self, stream: BinaryIO): -# self._validate_md5(stream, self.blob_ptr.header_pos, None, self.FILE_MD5_EIGEN, self.file_md5) -# -# def validate_header_md5(self, stream: BinaryIO): -# self._validate_md5(stream, self.blob_ptr.header_pos, self.blob_ptr.header_size, self.HEADER_MD5_EIGEN, self.header_md5) -# -# -# class Archive(ArchiveABC): -# meta: ArchiveMeta -# drives: List[Drive] # typing -# -# TOC_PTRS = _ToCPtrs -# VDRIVE_DEF = _DriveDef -# FOLDER_DEF = _FolderDef -# FILE_DEF = FileDef -# VERSION = version -# META_PREFIX_LAYOUT = Struct("<16s 128s 16s 6I") -# -# _UNIQUE_UNKS: ClassVar = set() # For Analysis -# -# @classmethod -# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): -# files = [] -# for f_def in file_defs: -# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type) # TODO handle hash -# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) -# file = File(names[f_def.name_rel_pos], meta, None, sparse) -# files.append(file) -# return files -# -# @classmethod -# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: -# encoded_name: bytes -# file_md5, encoded_name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = cls.META_PREFIX_LAYOUT.unpack_stream(stream) -# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") -# assert RSV_1 == 1, RSV_1 -# assert RSV_0 == 0, RSV_0 -# # header_pos = stream.tell() -# stream.seek(header_pos) -# toc_ptrs = cls.TOC_PTRS.unpack(stream) -# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, None) -# meta = ArchiveMeta(file_md5, header_md5, blob_ptrs, unk_a) -# cls._UNIQUE_UNKS.add(unk_a) -# return decoded_name, meta, blob_ptrs, toc_ptrs -# -# -# class API(APIvX): -# version = version -# Archive = Archive -# File = File -# Folder = Folder -# Drive = Drive diff --git a/src/relic/sga/v5/_serializers.py b/src/relic/sga/v5/_serializers.py index 163a722..9878c14 100644 --- a/src/relic/sga/v5/_serializers.py +++ b/src/relic/sga/v5/_serializers.py @@ -7,8 +7,9 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive -from relic.sga.core import MagicWord, Version, MismatchError -from relic.sga.protocols import StreamSerializer, StorageType, VerificationType +from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.protocols import StreamSerializer +from relic.sga._core import StorageType, VerificationType, MagicWord, Version from relic.sga.v5 import core folder_layout = Struct(" Archive: MagicWord.read_magic_word(stream) version = Version.unpack(stream) - version.assert_version_matches(self.version) + if version != self.version: + raise VersionMismatchError(version,self.version) name: bytes file_md5, name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = self.layout.unpack_stream(stream) @@ -94,4 +96,4 @@ def __init__(self): self.FileDef = file_serializer self.TocHeader = toc_header_serializer self.version = core.version - self.layout = Struct("<16s 128s 16s 2I") + self.layout = Struct("<16s 128s 16s 6I") diff --git a/src/relic/sga/v5/core.py b/src/relic/sga/v5/core.py index 9f77fd6..f9dccef 100644 --- a/src/relic/sga/v5/core.py +++ b/src/relic/sga/v5/core.py @@ -6,10 +6,10 @@ from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.core import Version -from relic.sga.protocols import VerificationType +from relic.sga.error import Version +from relic.sga._core import VerificationType -version = Version(2) +version = Version(5) @dataclass diff --git a/src/relic/sga/v7.py b/src/relic/sga/v7.py deleted file mode 100644 index a6308e4..0000000 --- a/src/relic/sga/v7.py +++ /dev/null @@ -1,124 +0,0 @@ -# from __future__ import annotations -# from dataclasses import dataclass -# from datetime import datetime, timezone -# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -# -# from serialization_tools.structx import Struct -# -# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC -# from relic.sga.vX import APIvX -# -# version = Version(7) -# -# -# class _ToCPtrs(ToCPtrsABC): -# LAYOUT = ToCPtrsABC.LAYOUT_UINT32 -# -# -# class _DriveDef(DriveDefABC): -# LAYOUT = DriveDefABC.LAYOUT_UINT32 -# -# -# class _FolderDef(FolderDefABC): -# LAYOUT = FolderDefABC.LAYOUT_UINT32 -# -# -# @dataclass -# class FileDef(FileDefABC): -# LAYOUT = Struct("<5I 2B 2I") -# # v7 Specific data -# modified: datetime # Unix EPOCH -# verification_type: FileVerificationType -# crc: int -# hash_pos: int -# -# @classmethod -# def unpack(cls, stream: BinaryIO): -# # print(stream.tell()) -# name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val, crc, hash_pos = cls.LAYOUT.unpack_stream(stream) -# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) -# storage_type = FileStorageType(storage_type_val) -# verification_type = FileVerificationType(verification_type_val) -# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) -# -# -# @dataclass -# class FileMeta(FileMetaABC): -# modified: datetime -# verification: FileVerificationType -# storage: FileStorageType -# crc: int -# hash: bytes -# -# -# class File(FileABC): -# meta: FileMeta -# -# -# @dataclass -# class Folder(FolderABC): -# folders: List[Folder] -# files: List[File] -# -# -# class Drive(DriveABC): -# folders: List[Folder] -# files: List[File] -# -# -# @dataclass -# class ArchiveMeta(ArchiveMetaABC): -# LAYOUT: ClassVar = Struct("<2I") -# unk_a: int -# block_size: int -# -# @classmethod -# def unpack(cls, stream): -# layout = cls.LAYOUT -# args = layout.unpack_stream(stream) -# return cls(*args) -# -# def pack(self, stream): -# layout = self.LAYOUT -# args = self.unk_a, self.block_size -# return layout.pack_stream(stream, *args) -# -# -# class Archive(ArchiveABC): -# drives: List[Drive] # typing -# TOC_PTRS = _ToCPtrs -# VDRIVE_DEF = _DriveDef -# FOLDER_DEF = _FolderDef -# FILE_DEF = FileDef -# VERSION = Version(7) -# META_PREFIX_LAYOUT = Struct("<128s 3I") -# -# @classmethod -# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): -# files = [] -# for f_def in file_defs: -# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash -# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) -# file = File(names[f_def.name_rel_pos], meta, None, sparse) -# files.append(file) -# return files -# -# @classmethod -# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: -# encoded_name: bytes -# encoded_name, header_size, data_pos, RSV_1 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) -# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") -# assert RSV_1 == 1 -# header_pos = stream.tell() -# toc_ptrs = cls.TOC_PTRS.unpack(stream) -# meta = ArchiveMeta.unpack(stream) -# blob_ptrs = BlobPtrs(header_pos, None, data_pos, None) -# return decoded_name, meta, blob_ptrs, toc_ptrs -# -# -# class API(APIvX): -# version = version -# Archive = Archive -# File = File -# Folder = Folder -# Drive = Drive diff --git a/src/relic/sga/v7/_serializers.py b/src/relic/sga/v7/_serializers.py index ce4526b..a8dc748 100644 --- a/src/relic/sga/v7/_serializers.py +++ b/src/relic/sga/v7/_serializers.py @@ -7,8 +7,9 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive -from relic.sga.core import MagicWord, Version, MismatchError -from relic.sga.protocols import StreamSerializer, StorageType, VerificationType +from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.protocols import StreamSerializer +from relic.sga._core import StorageType, VerificationType, MagicWord, Version from relic.sga.v7 import core folder_layout = Struct(" int: modified: int = int(value.modified.timestamp()) storage_type = value.storage_type.value # convert enum to value - verification_type = value.verification.value # convert enum to value + verification_type = value.verification.value # convert enum to value args = value.name_pos, value.data_pos, value.length_on_disk, value.length_in_archive, modified, verification_type, storage_type, value.crc, value.hash_pos return self.layout.pack_stream(stream, *args) @@ -53,7 +54,9 @@ class APISerializers(_abc.APISerializer): def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> Archive: MagicWord.read_magic_word(stream) version = Version.unpack(stream) - version.assert_version_matches(self.version) + if version != self.version: + raise VersionMismatchError(version,self.version) + name: bytes name, header_size, data_pos, RSV_1 = self.layout.unpack_stream(stream) @@ -77,7 +80,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file._lazy_info = None name: str = name.rstrip(b"").decode("utf-16-le") - metadata = core.ArchiveMetadata(unk_a,block_size) + metadata = core.ArchiveMetadata(unk_a, block_size) return Archive(name, metadata, drives) diff --git a/src/relic/sga/v7/core.py b/src/relic/sga/v7/core.py index bc252ad..a979c3f 100644 --- a/src/relic/sga/v7/core.py +++ b/src/relic/sga/v7/core.py @@ -2,14 +2,13 @@ from dataclasses import dataclass from datetime import datetime -from typing import Optional, BinaryIO from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.core import Version -from relic.sga.protocols import VerificationType +from relic.sga.error import Version +from relic.sga._core import VerificationType -version = Version(2) +version = Version(7) @dataclass diff --git a/src/relic/sga/v9.py b/src/relic/sga/v9.py deleted file mode 100644 index aef0972..0000000 --- a/src/relic/sga/v9.py +++ /dev/null @@ -1,116 +0,0 @@ -# from __future__ import annotations -# from dataclasses import dataclass -# from datetime import datetime, timezone -# from typing import BinaryIO, Tuple, List, Dict, ClassVar, Optional -# -# from serialization_tools.structx import Struct -# -# from relic.sga.core import ArchiveABC, ArchiveMetaABC, BlobPtrs, FileDefABC, ToCPtrsABC, DriveDefABC, FolderDefABC, FileVerificationType, FileStorageType, FileMetaABC, FileSparseInfo, FileABC, FolderABC, Version, DriveABC -# from relic.sga.vX import APIvX -# -# version = Version(9) -# -# class _ToCPtrs(ToCPtrsABC): -# LAYOUT = ToCPtrsABC.LAYOUT_UINT32 -# -# -# class _DriveDef(DriveDefABC): -# LAYOUT = DriveDefABC.LAYOUT_UINT32 -# -# -# class _FolderDef(FolderDefABC): -# LAYOUT = FolderDefABC.LAYOUT_UINT32 -# -# -# @dataclass -# class FileDef(FileDefABC): -# LAYOUT = Struct("<2I Q 3I 2B I") -# # v7 Specific data -# modified: datetime # Unix EPOCH -# verification_type: FileVerificationType -# crc: int -# hash_pos: int -# -# @classmethod -# def unpack(cls, stream: BinaryIO): -# name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type_val, storage_type_val, crc = cls.LAYOUT.unpack_stream(stream) -# modified = datetime.fromtimestamp(modified_seconds, timezone.utc) -# storage_type = FileStorageType(storage_type_val) -# verification_type = FileVerificationType(verification_type_val) -# return cls(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc, hash_pos) -# -# -# @dataclass -# class FileMeta(FileMetaABC): -# modified: datetime -# verification: FileVerificationType -# storage: FileStorageType -# crc: int -# hash: bytes -# -# -# class File(FileABC): -# meta: FileMeta -# -# -# @dataclass -# class Folder(FolderABC): -# folders: List[Folder] -# files: List[File] -# -# -# class Drive(DriveABC): -# folders: List[Folder] -# files: List[File] -# -# -# @dataclass -# class ArchiveMeta(ArchiveMetaABC): -# sha_256: bytes -# unk_a: int -# unk_b: int -# block_size: int -# -# -# class Archive(ArchiveABC): -# drives: List[Drive] # typing -# TOC_PTRS = _ToCPtrs -# VDRIVE_DEF = _DriveDef -# FOLDER_DEF = _FolderDef -# FILE_DEF = FileDef -# VERSION = version -# META_PREFIX_LAYOUT = Struct("<128s QIQQ I 256s") -# META_POSTFIX_LAYOUT = Struct("<3I") -# NAME_BUFFER_USES_COUNT = False -# -# @classmethod -# def _assemble_files(cls, file_defs: List[FileDef], names: Dict[int, str], data_pos: int): -# files = [] -# for f_def in file_defs: -# meta = FileMeta(f_def.storage_type, f_def.modified, f_def.verification_type, f_def.crc, None) # TODO handle hash -# sparse = FileSparseInfo(f_def.storage_type, data_pos + f_def.data_rel_pos, f_def.length, f_def.store_length) -# file = File(names[f_def.name_rel_pos], meta, None, sparse) -# files.append(file) -# return files -# -# @classmethod -# def _unpack_meta(cls, stream: BinaryIO) -> Tuple[str, ArchiveMetaABC, BlobPtrs, ToCPtrsABC]: -# encoded_name: bytes -# encoded_name, header_pos, header_size, data_pos, data_size, RSV_1, sha_256 = cls.META_PREFIX_LAYOUT.unpack_stream(stream) -# decoded_name = encoded_name.decode("utf-16-le").rstrip("\0") -# assert RSV_1 == 1, RSV_1 -# stream.seek(header_pos) -# toc_ptrs = cls.TOC_PTRS.unpack(stream) -# unk_a, unk_b, block_size = cls.META_POSTFIX_LAYOUT.unpack_stream(stream) -# meta = ArchiveMeta(sha_256,unk_a,unk_b, block_size) -# blob_ptrs = BlobPtrs(header_pos, header_size, data_pos, data_size) -# return decoded_name, meta, blob_ptrs, toc_ptrs -# -# def _pack_meta(self,): -# -# class API(APIvX): -# version = version -# Archive = Archive -# File = File -# Folder = Folder -# Drive = Drive \ No newline at end of file diff --git a/src/relic/sga/v9/__init__.py b/src/relic/sga/v9/__init__.py new file mode 100644 index 0000000..519fe96 --- /dev/null +++ b/src/relic/sga/v9/__init__.py @@ -0,0 +1,22 @@ +from relic.sga import _abc +from relic.sga.v9._serializers import APISerializers +from relic.sga.v9.core import Archive, Drive, Folder, File, ArchiveMetadata, version + + +def _create_api(): + serializer = APISerializers() + api = _abc.API(version, Archive, Drive, Folder, File, serializer) + return api + + +API = _create_api() + +__all__ = [ + "Archive", + "Drive", + "Folder", + "File", + "API", + "version", + "ArchiveMetadata" +] diff --git a/src/relic/sga/v9/_serializers.py b/src/relic/sga/v9/_serializers.py new file mode 100644 index 0000000..35baf37 --- /dev/null +++ b/src/relic/sga/v9/_serializers.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import BinaryIO, Optional + +from serialization_tools.structx import Struct + +from relic.sga import _abc, _serializers as _s +from relic.sga._abc import Archive +from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.protocols import StreamSerializer +from relic.sga._core import StorageType, VerificationType, Version, MagicWord +from relic.sga.v9 import core + +folder_layout = Struct("<5I") +folder_serializer = _s.FolderDefSerializer(folder_layout) + +drive_layout = Struct("<64s 64s 5I") +drive_serializer = _s.DriveDefSerializer(drive_layout) + +file_layout = Struct("<2I Q 3I 2B I") + + +class FileDefSerializer(StreamSerializer[core.FileDef]): + def __init__(self, layout: Struct): + self.layout = layout + + def unpack(self, stream: BinaryIO): + storage_type: int + verification_type: int + + name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type, storage_type, crc = self.layout.unpack_stream(stream) + + modified = datetime.fromtimestamp(modified_seconds, timezone.utc) + storage_type: StorageType = StorageType(storage_type) + verification_type: VerificationType = VerificationType(verification_type) + + return core.FileDef(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type,crc, hash_pos) + + def pack(self, stream: BinaryIO, value: core.FileDef) -> int: + modified: int = int(value.modified.timestamp()) + storage_type = value.storage_type.value # convert enum to value + verification_type = value.verification.value # convert enum to value + args = value.name_pos, value.hash_pos, value.data_pos, value.length_on_disk, value.length_in_archive, storage_type, modified, verification_type, value.crc + return self.layout.pack_stream(stream, *args) + + +file_serializer = FileDefSerializer(file_layout) +toc_layout = Struct("<8I") +toc_header_serializer = _s.TocHeaderSerializer(toc_layout) + + +class APISerializers(_abc.APISerializer): + def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> Archive: + MagicWord.read_magic_word(stream) + version = Version.unpack(stream) + if version != self.version: + raise VersionMismatchError(version,self.version) + + + name: bytes + name, header_pos, header_size, data_pos, data_pos, RSV_1, sha_256 = self.layout.unpack_stream(stream) + if RSV_1 != 1: + raise MismatchError("Reserved Field", RSV_1, 1) + # header_pos = stream.tell() + stream.seek(header_pos) + toc_header = self.TocHeader.unpack(stream) + unk_a, unk_b, block_size = self.metadata_layout.unpack_stream(stream) + drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) + names = _s._read_toc_names_as_size(stream, toc_header.name_info, header_pos) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + + if not lazy: + for file in files: + lazy_info: Optional[_abc._FileLazyInfo] = file._lazy_info + if lazy_info is None: + raise Exception("API read files, but failed to create lazy info!") + else: + file.data = lazy_info.read(decompress) + file._lazy_info = None + + name: str = name.rstrip(b"").decode("utf-16-le") + metadata = core.ArchiveMetadata(sha_256, unk_a, unk_b, block_size) + + return Archive(name, metadata, drives) + + def write(self, stream: BinaryIO, archive: Archive) -> int: + raise NotImplementedError + + def __init__(self): + self.DriveDef = drive_serializer + self.FolderDef = folder_serializer + self.FileDef = file_serializer + self.TocHeader = toc_header_serializer + self.version = core.version + self.layout = Struct("<128s QIQQ I 256s") + self.metadata_layout = Struct("<3I") diff --git a/src/relic/sga/v9/core.py b/src/relic/sga/v9/core.py new file mode 100644 index 0000000..9af0a5a --- /dev/null +++ b/src/relic/sga/v9/core.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Optional, BinaryIO + +from relic.sga import _abc +from relic.sga._abc import FileDefABC +from relic.sga.error import Version +from relic.sga._core import VerificationType + +version = Version(9) + + +@dataclass +class ArchiveMetadata: + sha_256:bytes + unk_a: int + unk_b: int + block_size:int + + +@dataclass +class FileDef(FileDefABC): + modified: datetime + verification: VerificationType + crc: int + hash_pos: int + + +@dataclass +class FileMetadata: + modified: datetime + verification: VerificationType + crc: int + hash_pos: int + + +Archive = _abc.Archive[ArchiveMetadata] +Folder = _abc.Folder +File = _abc.File[FileMetadata] +Drive = _abc.Drive diff --git a/src/relic/sga/vX.py b/src/relic/sga/vX.py deleted file mode 100644 index 6f2e03b..0000000 --- a/src/relic/sga/vX.py +++ /dev/null @@ -1,36 +0,0 @@ -# from types import ModuleType -# from typing import Type, Protocol -# -# from relic.sga.protocols import Archive, File, Folder, Drive -# -# -# class APIvX(Protocol): -# """ -# Allows us to have a TYPED OBJECT with required types for each version -# -# """ -# -# version: Version -# # Archive -# Archive: Type[Archive] -# # ArchiveHeader: Type[protocols.ArchiveHeader] -# # # Table Of Contents -# # ArchiveToCPtr: Type[abc_.ArchiveToCPtrABC] -# # ArchiveTableOfContentsHeaders: Type[abc_.ArchiveTableOfContentsHeadersABC] -# # # Files -# # FileHeader: Type[abc_.FileHeaderABC] -# File: Type[FileABC] -# # # Folders -# # FolderHeader: Type[abc_.FolderHeaderABC] -# Folder: Type[FolderABC] -# # # VDrive -# # VirtualDriveHeader: Type[abc_.VirtualDriveHeaderABC] -# Drive: Type[DriveABC] -# -# -# """Modules implementing vX should define all of the following attributes""" -# required_attrs = APIvX.__annotations__.keys() -# -# -# def is_module_api(module: ModuleType): -# return all(hasattr(module, attr) for attr in required_attrs) diff --git a/src/relic/sga/writer_tester.py b/src/relic/sga/writer_tester.py deleted file mode 100644 index 28f1c92..0000000 --- a/src/relic/sga/writer_tester.py +++ /dev/null @@ -1,36 +0,0 @@ -# # To write an Archive -# import zlib -# from abc import abstractmethod -# from io import BytesIO -# from typing import List, BinaryIO, Dict, Tuple, Type, Optional -# -# from relic.sga import v9 -# from relic.sga.core import DriveABC, DriveDefABC, FolderDefABC, ArchivePathable, FileDefABC, FolderABC, FileABC, FileStorageType, FileMetaABC, ArchiveABC -# -# -# -# -# if __name__ == "__main__": -# a = v9.Archive("Test", None, []) -# drive = DriveABC([], [], "data", "Test Archvie") -# a.drives = [drive] -# drive_folder = FolderABC("drive-folder-a", [], [], _parent_path=drive) -# drive_file = FileABC("drive-file-buffer-comp-b.raw", FileMetaABC(FileStorageType.BufferCompress), b"This is a test 'buffer compress' file!", _parent_path=drive) -# drive.folders = [drive_folder] -# drive.files = [drive_file] -# -# drive_folder_folder = FolderABC("drive-folder-folder-c", [], [], _parent_path=drive_folder) -# drive_folder_file_d = FileABC("drive-folder-file-stream-comp-d.raw", FileMetaABC(FileStorageType.StreamCompress), b"This is a test 'stream compress' file!", _parent_path=drive_folder) -# drive_folder_file_e = FileABC("drive-folder-file-store-e.raw", FileMetaABC(FileStorageType.StreamCompress), b"This is a test 'stream compress' file!", _parent_path=drive_folder) -# drive_folder.folders = [drive_folder_folder] -# drive_folder.files = [drive_folder_file_d, drive_folder_file_e] -# -# with BytesIO() as name_stream: -# with BytesIO() as data_stream: -# writer = ArchiveFlattener(name_stream, data_stream) -# writer.flatten_archive(a) -# name_stream.seek(0) -# data_stream.seek(0) -# names = name_stream.read() -# data = data_stream.read() -# _ = None diff --git a/src/scripts/universal/sga/common.py b/src/scripts/universal/sga/common.py index 2a12f2e..887582a 100644 --- a/src/scripts/universal/sga/common.py +++ b/src/scripts/universal/sga/common.py @@ -5,7 +5,7 @@ from serialization_tools.walkutil import blacklisted -from relic.sga.core import MagicWord +from relic.sga.error import MagicWord from scripts.universal.common import print_error, print_wrote, print_reading, PrintOptions, SharedExtractorParser SharedSgaParser = argparse.ArgumentParser(parents=[SharedExtractorParser], add_help=False) diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index fd619c0..1faf176 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Dict -from relic.sga.core import FileABC +from relic.sga.error import FileABC from relic.sga.apis import read_archive from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py index ba01a51..65c97e1 100644 --- a/tests/relic/sga/archive/test_archive.py +++ b/tests/relic/sga/archive/test_archive.py @@ -1,124 +1,210 @@ +import json from abc import abstractmethod from io import BytesIO +from pathlib import Path +from typing import Union, Iterable, Tuple import pytest +import serialization_tools.magic -from relic.sga.core import ArchiveWalk, ArchiveABC as Archive -from tests.helpers import TF +from relic.sga import v2, v5, v9, MagicWord, Version, v7 +from relic.sga.protocols import API from tests.relic.sga.datagen import DowII, DowI, DowIII -def _ARCHIVE_WALK_SAMPLE(a: Archive) -> ArchiveWalk: - d = a.drives[0] - sfs = d.folders - dfs = d.files - yield d, None, sfs, dfs - yield d, sfs[0], [], sfs[0].files +# def _ARCHIVE_WALK_SAMPLE(a: Archive) -> ArchiveWalk: +# d = a.drives[0] +# sfs = d.folders +# dfs = d.files +# yield d, None, sfs, dfs +# yield d, sfs[0], [], sfs[0].files -class ArchiveTests: - def assert_equal(self, expected: Archive, result: Archive, sparse: bool): - assert expected.meta == result.meta - if sparse: - assert result._sparse - # TODO - +class APITests: @abstractmethod - def test_walk(self, archive: Archive, expected: ArchiveWalk): - archive_walk = archive.walk() - for (a_vdrive, a_folder, a_folders, a_files), (e_vdrive, e_folder, e_folders, e_files) in zip(archive_walk, expected): - assert a_vdrive == e_vdrive - assert a_folder == e_folder - assert a_folders == e_folders - assert a_files == e_files + def test_read(self, buffer: Union[bytes, str], api: API): + if isinstance(buffer, str): + with open(buffer, "rb") as stream: + api.read(stream, True) + else: + with BytesIO(buffer) as stream: + api.read(stream, True) + # def assert_equal(self, expected: Archive, result: Archive, sparse: bool): + # assert expected.meta == result.meta + # if sparse: + # assert result._sparse + # # TODO + + # @abstractmethod + # def test_walk(self, archive: Archive, expected: ArchiveWalk): + # archive_walk = archive.walk() + # for (a_vdrive, a_folder, a_folders, a_files), (e_vdrive, e_folder, e_folders, e_files) in zip(archive_walk, expected): + # assert a_vdrive == e_vdrive + # assert a_folder == e_folder + # assert a_folders == e_folders + # assert a_files == e_files + + # @abstractmethod + # def test_unpack(self, stream_data: bytes, expected: Archive): + # for sparse in TF: + # with BytesIO(stream_data) as stream: + # archive = expected.__class__.unpack(stream, expected.header, sparse) + # assert expected.__class__ == archive.__class__ + # self.assert_equal(expected, archive, sparse) + # + # @abstractmethod + # def test_pack(self, archive: Archive, expected: bytes): + # for write_magic in TF: + # try: + # with BytesIO() as stream: + # packed = archive.pack(stream, write_magic) + # except NotImplementedError: + # pass # Currently not implemented; we'll expect this for now + # else: + # assert expected == packed + + +def scan_directory(root_dir: str, desired_version: Version) -> Iterable[str]: + root_directory = Path(root_dir) + for path_object in root_directory.glob('**/*.sga'): + with path_object.open("rb") as stream: + if not MagicWord.check_magic_word(stream, advance=True): + continue + version = Version.unpack(stream) + if version != desired_version: + continue + yield str(path_object) - @abstractmethod - def test_unpack(self, stream_data: bytes, expected: Archive): - for sparse in TF: - with BytesIO(stream_data) as stream: - archive = expected.__class__.unpack(stream, expected.header, sparse) - assert expected.__class__ == archive.__class__ - self.assert_equal(expected, archive, sparse) - @abstractmethod - def test_pack(self, archive: Archive, expected: bytes): - for write_magic in TF: - try: - with BytesIO() as stream: - packed = archive.pack(stream, write_magic) - except NotImplementedError: - pass # Currently not implemented; we'll expect this for now - else: - assert expected == packed +def fast_gen_dow1_archive(*args): + return None, DowI.gen_sample_archive_buffer(*args) + # return DowI.gen_sample_archive(*args),\ + # DowI.gen_sample_archive_buffer(*args) -def fast_gen_dow1_archive(*args): - return DowI.gen_sample_archive(*args), DowI.gen_sample_archive_buffer(*args) +def buffer_paramefy(files: Iterable[str]) -> Iterable[Tuple[str]]: + return [(_,) for _ in files] -DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") +try: + path = Path(__file__) + path = path.parent / "file_sources.json" + with path.open() as stream: + file_sources = json.load(stream) +except IOError as e: + file_sources = {} + +def _helper(src_key: str, version: Version): + try: + return buffer_paramefy(scan_directory(file_sources[src_key], version)) + except: + return tuple() -class TestArchiveV2(ArchiveTests): - @pytest.mark.parametrize(["stream_data", "expected"], - [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: Archive): - super().test_unpack(stream_data, expected) - @pytest.mark.parametrize(["archive", "expected"], - [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): - super().test_pack(archive, expected) +v2Files = _helper("v2", v2.version) +v5Files = _helper("v5", v5.version) +v7Files = _helper("v7", v7.version) +v9Files = _helper("v9", v9.version) - @pytest.mark.parametrize(["archive", "expected"], - [(DOW1_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW1_ARCHIVE))]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): - super().test_walk(archive, expected) +DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") -def fast_gen_dow2_archive(*args): - return DowII.gen_sample_archive(*args), DowII.gen_sample_archive_buffer(*args) +class TestV2(APITests): + @pytest.fixture() + def api(self) -> API: + return v2.API + + @pytest.mark.parametrize(["buffer"], [(DOW1_ARCHIVE_PACKED,), *v2Files]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV2, self).test_read(buffer, api) + # @pytest.mark.parametrize(["stream_data", "expected"], + # [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) + # def test_unpack(self, stream_data: bytes, expected: Archive): + # super().test_unpack(stream_data, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) + # def test_pack(self, archive: Archive, expected: bytes): + # super().test_pack(archive, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW1_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW1_ARCHIVE))]) + # def test_walk(self, archive: Archive, expected: ArchiveWalk): + # super().test_walk(archive, expected) -DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") +def fast_gen_dow2_archive(*args): + return None, DowII.gen_sample_archive_buffer(*args) + # return DowII.gen_sample_archive(*args),\ + # DowII.gen_sample_archive_buffer(*args) -class TestArchiveV5(ArchiveTests): - @pytest.mark.parametrize(["stream_data", "expected"], - [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: Archive): - super().test_unpack(stream_data, expected) +DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") - @pytest.mark.parametrize(["archive", "expected"], - [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): - super().test_pack(archive, expected) - @pytest.mark.parametrize(["archive", "expected"], - [(DOW2_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW2_ARCHIVE))]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): - super().test_walk(archive, expected) +class TestV5(APITests): + @pytest.fixture() + def api(self) -> API: + return v5.API + + @pytest.mark.parametrize(["buffer"], [*v5Files, (DOW2_ARCHIVE_PACKED,)]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV5, self).test_read(buffer, api) + # @pytest.mark.parametrize(["stream_data", "expected"], + # [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) + # def test_unpack(self, stream_data: bytes, expected: Archive): + # super().test_unpack(stream_data, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) + # def test_pack(self, archive: Archive, expected: bytes): + # super().test_pack(archive, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW2_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW2_ARCHIVE))]) + # def test_walk(self, archive: Archive, expected: ArchiveWalk): + # super().test_walk(archive, expected) def fast_gen_dow3_archive(*args): - return DowIII.gen_sample_archive(*args), DowIII.gen_sample_archive_buffer(*args) + return None, DowIII.gen_sample_archive_buffer(*args) + # return DowIII.gen_sample_archive(*args), \ + # DowIII.gen_sample_archive_buffer(*args) DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED = fast_gen_dow3_archive("Dow3 Test Archive", "Tests", "Some Witty FileName.txt", b"NGL; I'm running out of dumb/clever test data.") -class TestArchiveV9(ArchiveTests): - @pytest.mark.parametrize(["stream_data", "expected"], - [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) - def test_unpack(self, stream_data: bytes, expected: Archive): - super().test_unpack(stream_data, expected) - - @pytest.mark.parametrize(["archive", "expected"], - [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) - def test_pack(self, archive: Archive, expected: bytes): - super().test_pack(archive, expected) - - @pytest.mark.parametrize(["archive", "expected"], - [(DOW3_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW3_ARCHIVE))]) - def test_walk(self, archive: Archive, expected: ArchiveWalk): - super().test_walk(archive, expected) +class TestV9(APITests): + @pytest.fixture() + def api(self) -> API: + return v9.API + + @pytest.mark.parametrize(["buffer"], [*v9Files, (DOW3_ARCHIVE_PACKED,)]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV9, self).test_read(buffer, api) + # @pytest.mark.parametrize(["stream_data", "expected"], + # [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) + # def test_unpack(self, stream_data: bytes, expected: Archive): + # super().test_unpack(stream_data, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) + # def test_pack(self, archive: Archive, expected: bytes): + # super().test_pack(archive, expected) + # + # @pytest.mark.parametrize(["archive", "expected"], + # [(DOW3_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW3_ARCHIVE))]) + # def test_walk(self, archive: Archive, expected: ArchiveWalk): + # super().test_walk(archive, expected) + + +class TestV7(APITests): + @pytest.fixture() + def api(self) -> API: + return v7.API + + @pytest.mark.parametrize(["buffer"], [*v7Files]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV7, self).test_read(buffer, api) diff --git a/tests/relic/sga/datagen.py b/tests/relic/sga/datagen.py index 6f04b41..34206bd 100644 --- a/tests/relic/sga/datagen.py +++ b/tests/relic/sga/datagen.py @@ -1,12 +1,49 @@ import hashlib -from typing import Tuple, Dict +from typing import Tuple, Dict, ClassVar -from serialization_tools.ioutil import WindowPtr, Ptr +from relic.sga._core import StorageType -from relic.sga.protocols import ArchiveHeader -from relic.sga_old.abc_old_ import FileABC, FolderABC, VirtualDriveABC, ArchiveTOC -from relic.sga_old import v2, v5, v9 -from relic.sga_old.common import ArchiveRange + +class VirtualDriveABC: + pass + + +class ArchiveHeader: + pass + + +class v9: + VirtualDriveHeader = None + FolderHeader: ClassVar = None + Archive: ClassVar = None + FileHeader: ClassVar = None + + +class v5: + VirtualDriveHeader = None + FolderHeader: ClassVar = None + Archive: ClassVar = None + FileHeader: ClassVar = None + + +class v2: + VirtualDriveHeader = None + FolderHeader: ClassVar = None + Archive: ClassVar = None + FileCompressionFlag: ClassVar = None + FileHeader: ClassVar = None + + +class FolderABC: + pass + + +class FileABC: + pass + + +class ArchiveTOC: + pass def encode_and_pad(v: str, byte_size: int, encoding: str) -> bytes: @@ -35,11 +72,12 @@ def splice_toc_offsets(vdrive: int, folders: int, files: int, names: int, offset class DowI: DEFAULT_CSUMS = (b"\x01\x02\0\x04\0\0\0\x08\0\0\0\0\0\0\0\0", b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") - VDRIVE_UNK = b"\xde\xad" # Arbitrary value + DEF_ROOT_FOLDER = ushort(0) # b"\xde\xad" # Arbitrary value @staticmethod def gen_archive_header(name: str, toc_size: int, data_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, toc_pos: int = 180) -> ArchiveHeader: - return v2.ArchiveHeader(name, WindowPtr(toc_pos, toc_size), WindowPtr(data_offset), csums) + raise TypeError("Not currently supported") + # return v2.ArchiveHeader(name, WindowPtr(toc_pos, toc_size), WindowPtr(data_offset), csums) @staticmethod def gen_archive_header_buffer(name: str, toc_size: int, data_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, magic: bytes = b"_ARCHIVE") -> bytes: @@ -50,16 +88,18 @@ def gen_archive_header_buffer(name: str, toc_size: int, data_offset: int, csums: return magic + version + csums[0] + encoded_name + csums[1] + encoded_toc_size + encoded_data_offset @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v2.VirtualDriveHeader: - return v2.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = DEF_ROOT_FOLDER) -> v2.VirtualDriveHeader: + raise TypeError("Not currently supported") + # return v2.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) @staticmethod - def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK): - return encode_and_pad(path, 64, "ascii") + encode_and_pad(name, 64, "ascii") + ushort(subfolder_offset) + ushort(subfolder_offset + subfolder_count) + ushort(file_offset) + ushort(file_count + file_offset) + unk + def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", root_folder: bytes = DEF_ROOT_FOLDER): + return encode_and_pad(path, 64, "ascii") + encode_and_pad(name, 64, "ascii") + ushort(subfolder_offset) + ushort(subfolder_offset + subfolder_count) + ushort(file_offset) + ushort(file_count + file_offset) + root_folder @staticmethod def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v2.FolderHeader: - return v2.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + raise TypeError("Not currently supported") + # return v2.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) @staticmethod def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> bytes: @@ -67,25 +107,26 @@ def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfol @staticmethod def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: v2.FileCompressionFlag = None) -> v2.FileHeader: - if comp_size is None: - comp_size = decomp_size - if comp_flag is None: - if comp_size != decomp_size: - comp_flag = v2.FileCompressionFlag.Compressed16 # IDK, just choose one - else: - comp_flag = v2.FileCompressionFlag.Decompressed - return v2.FileHeader(Ptr(name_offset), WindowPtr(data_offset, comp_size), decomp_size, comp_size, comp_flag) + raise TypeError("Not currently supported") + # if comp_size is None: + # comp_size = decomp_size + # if comp_flag is None: + # if comp_size != decomp_size: + # comp_flag = v2.FileCompressionFlag.Compressed16 # IDK, just choose one + # else: + # comp_flag = v2.FileCompressionFlag.Decompressed + # return v2.FileHeader(Ptr(name_offset), WindowPtr(data_offset, comp_size), decomp_size, comp_size, comp_flag) @staticmethod - def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: v2.FileCompressionFlag = None) -> bytes: + def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None, comp_flag: StorageType = None) -> bytes: if comp_size is None: comp_size = decomp_size if comp_flag is None: if comp_size != decomp_size: - comp_flag = v2.FileCompressionFlag.Compressed16 # IDK, just choose one + comp_flag = 32 # StorageType.StreamCompress # IDK, just choose one else: - comp_flag = v2.FileCompressionFlag.Decompressed - return uint(name_offset) + uint(comp_flag.value) + uint(data_offset) + uint(decomp_size) + uint(comp_size) + comp_flag = 0 # StorageType.Store + return uint(name_offset) + uint(comp_flag) + uint(data_offset) + uint(decomp_size) + uint(comp_size) @staticmethod def gen_name_buffer(*names: str, encoding: str = "ascii") -> Tuple[bytes, Dict[str, int]]: @@ -114,7 +155,8 @@ def gen_toc_ptr_buffer(vdrive: Tuple[int, int], folders: Tuple[int, int], files: @staticmethod def gen_toc(vdrive: VirtualDriveABC, folder: FolderABC, file: FileABC, names: Dict[int, str]) -> ArchiveTOC: - return ArchiveTOC([vdrive], [folder], [file], names) + raise TypeError("Not currently supported") + # return ArchiveTOC([vdrive], [folder], [file], names) @classmethod def gen_archive_buffer(cls, archive_name: str, toc_ptrs: bytes, toc: bytes, data: bytes, magic: bytes = "_ARCHIVE") -> bytes: @@ -147,55 +189,57 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi @classmethod def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> v2.Archive: - def dirty_toc_hack(): - name_buf, name_offsets = cls.gen_name_buffer(folder, file) - vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) - folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) - file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) - toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) - toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) - return cls.gen_toc_ptr_buffer(*toc_ptrs) + toc_buf - - toc_buf = dirty_toc_hack() - - def dirty_csum_hack(): - EIGENS = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - - def gen_csum(buffer: bytes, eigen: bytes) -> bytes: - hasher = hashlib.md5(eigen) - hasher.update(buffer) - return bytes.fromhex(hasher.hexdigest()) - - csum2 = gen_csum(toc_buf, EIGENS[1]) - toc_and_data = toc_buf + file_uncomp_data - csum1 = gen_csum(toc_and_data, EIGENS[0]) - return csum1, csum2 - - csums = dirty_csum_hack() - - _, name_offsets = cls.gen_name_buffer(folder, file) - vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) - folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) - file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = v2.File(file_h, file, file_uncomp_data, True) - folder_ = v2.Folder(folder_h, folder, [], [file_]) - vdrive_ = v2.VirtualDrive(vdrive_h, [folder_], [file_]) - folder_.parent_drive = file_.parent_drive = vdrive_ - file_.parent_folder = folder_ - header = cls.gen_archive_header(archive_name, len(toc_buf), len(toc_buf) + toc_pos, csums, toc_pos) - return v2.Archive(header, [vdrive_], False) + raise TypeError("Currently not supported") + # def dirty_toc_hack(): + # name_buf, name_offsets = cls.gen_name_buffer(folder, file) + # vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) + # folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) + # file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) + # toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) + # toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) + # return cls.gen_toc_ptr_buffer(*toc_ptrs) + toc_buf + # + # toc_buf = dirty_toc_hack() + # + # def dirty_csum_hack(): + # EIGENS = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) + # + # def gen_csum(buffer: bytes, eigen: bytes) -> bytes: + # hasher = hashlib.md5(eigen) + # hasher.update(buffer) + # return bytes.fromhex(hasher.hexdigest()) + # + # csum2 = gen_csum(toc_buf, EIGENS[1]) + # toc_and_data = toc_buf + file_uncomp_data + # csum1 = gen_csum(toc_and_data, EIGENS[0]) + # return csum1, csum2 + # + # csums = dirty_csum_hack() + # + # _, name_offsets = cls.gen_name_buffer(folder, file) + # vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) + # folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) + # file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) + # file_ = v2.File(file_h, file, file_uncomp_data, True) + # folder_ = v2.Folder(folder_h, folder, [], [file_]) + # vdrive_ = v2.VirtualDrive(vdrive_h, [folder_], [file_]) + # folder_.parent_drive = file_.parent_drive = vdrive_ + # file_.parent_folder = folder_ + # header = cls.gen_archive_header(archive_name, len(toc_buf), len(toc_buf) + toc_pos, csums, toc_pos) + # return v2.Archive(header, [vdrive_], False) class DowII: DEFAULT_CSUMS = (b"\x01\x02\0\x04\0\0\0\x08\0\0\0\0\0\0\0\0", b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f") - VDRIVE_UNK = b"\xde\xad" # Arbitrary value + DEF_ROOT_FOLDER = ushort(0) # b"\xde\xad" # Arbitrary value ARCHIVE_HEADER_UNK = bytes.fromhex("4d41dFFF") # F in place of unknowns ARCHIVE_HEADER_UNK_INT = int.from_bytes(ARCHIVE_HEADER_UNK, byteorder="little", signed=False) # F in place of unknowns ARCHIVE_HEADER_SIZE = 196 @classmethod def gen_archive_header(cls, name: str, toc_size: int, data_offset: int, toc_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS) -> ArchiveHeader: - return v5.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset), csums, cls.ARCHIVE_HEADER_UNK_INT) + raise TypeError("Not currently supported") + # return v5.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset), csums, cls.ARCHIVE_HEADER_UNK_INT) @classmethod def gen_archive_header_buffer(cls, name: str, toc_size: int, data_offset: int, toc_offset: int, csums: Tuple[bytes, bytes] = DEFAULT_CSUMS, magic: bytes = b"_ARCHIVE") -> bytes: @@ -207,21 +251,24 @@ def gen_archive_header_buffer(cls, name: str, toc_size: int, data_offset: int, t return magic + version + csums[0] + encoded_name + csums[1] + encoded_toc_size + encoded_data_offset + encoded_toc_offset + uint(1) + uint(0) + cls.ARCHIVE_HEADER_UNK @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v5.VirtualDriveHeader: - return v5.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", root_folder: bytes = DEF_ROOT_FOLDER) -> v5.VirtualDriveHeader: + raise TypeError("Not currently supported") + # return v5.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) gen_vdrive_header_buffer = DowI.gen_vdrive_header_buffer # Same exact layout; @staticmethod def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v5.FolderHeader: - return v5.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + raise TypeError("Not currently supported") + # return v5.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) gen_folder_header_buffer = DowI.gen_folder_header_buffer # Same exact layout; @staticmethod def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> v5.FileHeader: - comp_size = decomp_size if comp_size is None else comp_size - return v5.FileHeader(Ptr(name_offset), Ptr(data_offset, comp_size), decomp_size, comp_size, 0, 0) + raise TypeError("Not currently supported") + # comp_size = decomp_size if comp_size is None else comp_size + # return v5.FileHeader(Ptr(name_offset), Ptr(data_offset, comp_size), decomp_size, comp_size, 0, 0) @staticmethod def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> bytes: @@ -263,53 +310,55 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi @classmethod def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes, toc_pos: int = 180) -> v5.Archive: - def dirty_toc_hack(): - name_buf, name_offsets = cls.gen_name_buffer(folder, file) - vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) - folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) - file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) - toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) - toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) - return cls.gen_toc_ptr_buffer(*toc_ptrs) + toc_buf - - full_toc = dirty_toc_hack() - - def dirty_csum_hack(): - EIGENS = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) - - def gen_csum(buffer: bytes, eigen: bytes) -> bytes: - hasher = hashlib.md5(eigen) - hasher.update(buffer) - return bytes.fromhex(hasher.hexdigest()) - - csum2 = gen_csum(full_toc, EIGENS[1]) - toc_and_data = full_toc + file_uncomp_data - csum1 = gen_csum(toc_and_data, EIGENS[0]) - return csum1, csum2 - - csums = dirty_csum_hack() - - _, name_offsets = cls.gen_name_buffer(folder, file) - vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) - folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) - file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = FileABC(file_h, file, file_uncomp_data, True) - folder_ = FolderABC(folder_h, folder, [], [file_]) - vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) - folder_.parent_drive = file_.parent_drive = vdrive_ - file_.parent_folder = folder_ - header = cls.gen_archive_header(archive_name, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), cls.ARCHIVE_HEADER_SIZE, csums) - return v5.Archive(header, [vdrive_], False) + raise TypeError("Not currently supported") + # def dirty_toc_hack(): + # name_buf, name_offsets = cls.gen_name_buffer(folder, file) + # vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) + # folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) + # file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) + # toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) + # toc_ptrs = splice_toc_offsets(1, 1, 1, 2, toc_offsets) + # return cls.gen_toc_ptr_buffer(*toc_ptrs) + toc_buf + # + # full_toc = dirty_toc_hack() + # + # def dirty_csum_hack(): + # EIGENS = ("E01519D6-2DB7-4640-AF54-0A23319C56C3".encode("ascii"), "DFC9AF62-FC1B-4180-BC27-11CCE87D3EFF".encode("ascii")) + # + # def gen_csum(buffer: bytes, eigen: bytes) -> bytes: + # hasher = hashlib.md5(eigen) + # hasher.update(buffer) + # return bytes.fromhex(hasher.hexdigest()) + # + # csum2 = gen_csum(full_toc, EIGENS[1]) + # toc_and_data = full_toc + file_uncomp_data + # csum1 = gen_csum(toc_and_data, EIGENS[0]) + # return csum1, csum2 + # + # csums = dirty_csum_hack() + # + # _, name_offsets = cls.gen_name_buffer(folder, file) + # vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) + # folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) + # file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) + # file_ = FileABC(file_h, file, file_uncomp_data, True) + # folder_ = FolderABC(folder_h, folder, [], [file_]) + # vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) + # folder_.parent_drive = file_.parent_drive = vdrive_ + # file_.parent_folder = folder_ + # header = cls.gen_archive_header(archive_name, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), cls.ARCHIVE_HEADER_SIZE, csums) + # return v5.Archive(header, [vdrive_], False) class DowIII: - VDRIVE_UNK = bytes.fromhex("dead") # Arbitrary value + DEF_ROOT_FOLDER = ushort(0) # bytes.fromhex("dead") # Arbitrary value ARCHIVE_HEADER_SIZE = 428 ARCHIVE_HEADER_UNK = b"dead " * 51 + b"\0" # 256 bytes spamming `dead ` in ascii; with one byte '\0' to pad to 256 @classmethod def gen_archive_header(cls, name: str, toc_offset: int, toc_size: int, data_offset: int, data_size: int) -> ArchiveHeader: - return v9.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset, data_size), cls.ARCHIVE_HEADER_UNK) + raise TypeError("Not currently supported") + # return v9.ArchiveHeader(name, WindowPtr(toc_offset, toc_size), WindowPtr(data_offset, data_size), cls.ARCHIVE_HEADER_UNK) @classmethod def gen_archive_header_buffer(cls, name: str, toc_offset: int, toc_size: int, data_offset: int, data_size: int, magic: bytes = b"_ARCHIVE") -> bytes: @@ -322,16 +371,18 @@ def gen_archive_header_buffer(cls, name: str, toc_offset: int, toc_size: int, da return magic + version + encoded_name + encoded_toc_offset + encoded_toc_size + encoded_data_offset + encoded_data_size + uint(0) + uint(1) + cls.ARCHIVE_HEADER_UNK @staticmethod - def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK) -> v9.VirtualDriveHeader: - return v9.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) + def gen_vdrive_header(archive_name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = DEF_ROOT_FOLDER) -> v9.VirtualDriveHeader: + raise TypeError("Not currently supported") + # return v9.VirtualDriveHeader(path, archive_name, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count), unk) @staticmethod - def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = VDRIVE_UNK): + def gen_vdrive_header_buffer(name: str, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0, path: str = "data", unk: bytes = DEF_ROOT_FOLDER): return encode_and_pad(path, 64, "ascii") + encode_and_pad(name, 64, "ascii") + uint(subfolder_offset) + uint(subfolder_offset + subfolder_count) + uint(file_offset) + uint(file_count + file_offset) + unk @staticmethod def gen_folder_header(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> v9.FolderHeader: - return v9.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) + raise TypeError("Not currently supported") + # return v9.FolderHeader(name_offset, ArchiveRange(subfolder_offset, subfolder_offset + subfolder_count), ArchiveRange(file_offset, file_offset + file_count)) @staticmethod def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfolder_count: int = 0, file_offset: int = 0, file_count: int = 0) -> bytes: @@ -341,7 +392,8 @@ def gen_folder_header_buffer(name_offset: int, subfolder_offset: int = 0, subfol def gen_file_header(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> v9.FileHeader: if comp_size is None: comp_size = decomp_size - return v9.FileHeader(Ptr(name_offset), Ptr(data_offset), decomp_size, comp_size, 0, 0, 0, 0, 0) + raise TypeError("Not currently supported") + # return v9.FileHeader(Ptr(name_offset), Ptr(data_offset), decomp_size, comp_size, 0, 0, 0, 0, 0) @staticmethod def gen_file_header_buffer(name_offset: int, data_offset: int, decomp_size: int, comp_size: int = None) -> bytes: @@ -369,7 +421,8 @@ def gen_toc_ptr_buffer(vdrive: Tuple[int, int], folders: Tuple[int, int], files: @staticmethod def gen_toc(vdrive: VirtualDriveABC, folder: FolderABC, file: FileABC, names: Dict[int, str]) -> ArchiveTOC: - return ArchiveTOC([vdrive], [folder], [file], names) + raise TypeError("Not currently supported") + # return ArchiveTOC([vdrive], [folder], [file], names) @classmethod def gen_archive_buffer(cls, archive_name: str, toc_ptrs: bytes, toc: bytes, data: bytes, magic: bytes = "_ARCHIVE") -> bytes: @@ -391,23 +444,24 @@ def gen_sample_archive_buffer(cls, archive_name: str, folder: str, file: str, fi @classmethod def gen_sample_archive(cls, archive_name: str, folder: str, file: str, file_uncomp_data: bytes) -> v9.Archive: - name_buf, name_offsets = cls.gen_name_buffer(folder, file) - vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) - folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) - file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) - file_ = FileABC(file_h, file, file_uncomp_data, True) - folder_ = FolderABC(folder_h, folder, [], [file_]) - vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) - folder_.parent_drive = file_.parent_drive = vdrive_ - file_.parent_folder = folder_ - - vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) - folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) - file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) - toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) - toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! - toc_ptr_buf = cls.gen_toc_ptr_buffer(*toc_ptrs) - full_toc = toc_ptr_buf + toc_buf - - header = cls.gen_archive_header(archive_name, cls.ARCHIVE_HEADER_SIZE, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), len(file_uncomp_data)) - return v9.Archive(header, [vdrive_], False) + raise TypeError("Not currently supported") + # name_buf, name_offsets = cls.gen_name_buffer(folder, file) + # vdrive_h = cls.gen_vdrive_header(archive_name, 0, 1, 0, 1) + # folder_h = cls.gen_folder_header(name_offsets[folder], 0, 0, 0, 1) + # file_h = cls.gen_file_header(name_offsets[file], 0, len(file_uncomp_data)) + # file_ = FileABC(file_h, file, file_uncomp_data, True) + # folder_ = FolderABC(folder_h, folder, [], [file_]) + # vdrive_ = VirtualDriveABC(vdrive_h, [folder_], [file_]) + # folder_.parent_drive = file_.parent_drive = vdrive_ + # file_.parent_folder = folder_ + # + # vdrive_buf = cls.gen_vdrive_header_buffer(archive_name, 0, 1, 0, 1) + # folder_buf = cls.gen_folder_header_buffer(name_offsets[folder], 0, 0, 0, 1) + # file_buf = cls.gen_file_header_buffer(name_offsets[file], 0, len(file_uncomp_data)) + # toc_buf, toc_offsets = cls.gen_toc_buffer_and_offsets(vdrive_buf, folder_buf, file_buf, name_buf) + # toc_ptrs = splice_toc_offsets(1, 1, 1, len(name_buf), toc_offsets) # WE NEED TO USE BYTE-SIZE of NAME BUFFER!!!! + # toc_ptr_buf = cls.gen_toc_ptr_buffer(*toc_ptrs) + # full_toc = toc_ptr_buf + toc_buf + # + # header = cls.gen_archive_header(archive_name, cls.ARCHIVE_HEADER_SIZE, len(full_toc), cls.ARCHIVE_HEADER_SIZE + len(full_toc), len(file_uncomp_data)) + # return v9.Archive(header, [vdrive_], False) diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py index eb1cd56..579fcf8 100644 --- a/tests/relic/sga/test_vX_interface.py +++ b/tests/relic/sga/test_vX_interface.py @@ -1,41 +1,41 @@ -from typing import Iterable, List, Tuple - -from relic.sga import ov2, v5, v7, v9, vX, apis -import pytest - -MODULES = [v2, v5,v7, v9] -ATTRS = vX.required_attrs -APIS = io.APIS.values() - - -def _permutate(*items: List): - def inner_permutate(subset: List, remaining: Tuple[List]) -> Iterable: - for item in subset: - if len(remaining) > 1: - for sub_items in inner_permutate(remaining[0], remaining[1:]): - yield item, *sub_items # Not possiblie in 3.7-, but we target 3.9+ - else: - for sub_item in remaining[0]: - yield item, sub_item - - if len(items) == 0: - return [] - elif len(items) == 1: - return items[0] - else: - return inner_permutate(items[0], items[1:]) - - -@pytest.mark.parametrize(["module"], [(m,) for m in MODULES]) -def test_module_is_vX_api(module): - assert vX.is_module_api(module) - - -@pytest.mark.parametrize(["module", "attr"], _permutate(MODULES, ATTRS)) -def test_module_has_required_vX_attr(module, attr: str): - assert hasattr(module, attr) - - -@pytest.mark.parametrize(["api", "attr"], _permutate(APIS, ATTRS)) -def test_api_has_required_vX_attr(api, attr: str): - assert hasattr(api, attr) +# from typing import Iterable, List, Tuple +# +# from relic.sga import ov2, v5, v7, v9, vX, apis +# import pytest +# +# MODULES = [v2, v5,v7, v9] +# ATTRS = vX.required_attrs +# APIS = io.APIS.values() +# +# +# def _permutate(*items: List): +# def inner_permutate(subset: List, remaining: Tuple[List]) -> Iterable: +# for item in subset: +# if len(remaining) > 1: +# for sub_items in inner_permutate(remaining[0], remaining[1:]): +# yield item, *sub_items # Not possiblie in 3.7-, but we target 3.9+ +# else: +# for sub_item in remaining[0]: +# yield item, sub_item +# +# if len(items) == 0: +# return [] +# elif len(items) == 1: +# return items[0] +# else: +# return inner_permutate(items[0], items[1:]) +# +# +# @pytest.mark.parametrize(["module"], [(m,) for m in MODULES]) +# def test_module_is_vX_api(module): +# assert vX.is_module_api(module) +# +# +# @pytest.mark.parametrize(["module", "attr"], _permutate(MODULES, ATTRS)) +# def test_module_has_required_vX_attr(module, attr: str): +# assert hasattr(module, attr) +# +# +# @pytest.mark.parametrize(["api", "attr"], _permutate(APIS, ATTRS)) +# def test_api_has_required_vX_attr(api, attr: str): +# assert hasattr(api, attr) From 186a1eb6eb688999c2f23931f4590bc204924b83 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 01:05:27 -0800 Subject: [PATCH 12/19] rename error.py to errors.py --- src/relic/sga/_abc.py | 2 +- src/relic/sga/{error.py => errors.py} | 0 src/relic/sga/v2/_serializers.py | 2 +- src/relic/sga/v2/core.py | 2 +- src/relic/sga/v5/_serializers.py | 2 +- src/relic/sga/v5/core.py | 2 +- src/relic/sga/v7/_serializers.py | 2 +- src/relic/sga/v7/core.py | 2 +- src/relic/sga/v9/_serializers.py | 2 +- src/relic/sga/v9/core.py | 2 +- src/scripts/universal/sga/common.py | 2 +- src/scripts/universal/sga/unpack.py | 2 +- 12 files changed, 11 insertions(+), 11 deletions(-) rename src/relic/sga/{error.py => errors.py} (100%) diff --git a/src/relic/sga/_abc.py b/src/relic/sga/_abc.py index 4f7d17d..5c1f1d1 100644 --- a/src/relic/sga/_abc.py +++ b/src/relic/sga/_abc.py @@ -11,7 +11,7 @@ from relic.sga import protocols as p from relic.sga.protocols import TFileMetadata, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer from relic.sga._core import StorageType -from relic.sga.error import Version +from relic.sga.errors import Version def _build_io_path(name: str, parent: Optional[p.IONode]) -> PurePath: diff --git a/src/relic/sga/error.py b/src/relic/sga/errors.py similarity index 100% rename from src/relic/sga/error.py rename to src/relic/sga/errors.py diff --git a/src/relic/sga/v2/_serializers.py b/src/relic/sga/v2/_serializers.py index 89e2d99..bc9926a 100644 --- a/src/relic/sga/v2/_serializers.py +++ b/src/relic/sga/v2/_serializers.py @@ -6,7 +6,7 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import FileDefABC as FileDef, Archive -from relic.sga.error import VersionMismatchError +from relic.sga.errors import VersionMismatchError from relic.sga.v2 import core from relic.sga._core import MagicWord, Version, StorageType from relic.sga.protocols import StreamSerializer diff --git a/src/relic/sga/v2/core.py b/src/relic/sga/v2/core.py index 69834e3..eb4f8d8 100644 --- a/src/relic/sga/v2/core.py +++ b/src/relic/sga/v2/core.py @@ -4,7 +4,7 @@ from typing import Optional, BinaryIO from relic.sga import _abc -from relic.sga.error import Version +from relic.sga.errors import Version version = Version(2) diff --git a/src/relic/sga/v5/_serializers.py b/src/relic/sga/v5/_serializers.py index 9878c14..5825f05 100644 --- a/src/relic/sga/v5/_serializers.py +++ b/src/relic/sga/v5/_serializers.py @@ -7,7 +7,7 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive -from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.errors import MismatchError, VersionMismatchError from relic.sga.protocols import StreamSerializer from relic.sga._core import StorageType, VerificationType, MagicWord, Version from relic.sga.v5 import core diff --git a/src/relic/sga/v5/core.py b/src/relic/sga/v5/core.py index f9dccef..803a337 100644 --- a/src/relic/sga/v5/core.py +++ b/src/relic/sga/v5/core.py @@ -6,7 +6,7 @@ from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.error import Version +from relic.sga.errors import Version from relic.sga._core import VerificationType version = Version(5) diff --git a/src/relic/sga/v7/_serializers.py b/src/relic/sga/v7/_serializers.py index a8dc748..295608e 100644 --- a/src/relic/sga/v7/_serializers.py +++ b/src/relic/sga/v7/_serializers.py @@ -7,7 +7,7 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive -from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.errors import MismatchError, VersionMismatchError from relic.sga.protocols import StreamSerializer from relic.sga._core import StorageType, VerificationType, MagicWord, Version from relic.sga.v7 import core diff --git a/src/relic/sga/v7/core.py b/src/relic/sga/v7/core.py index a979c3f..1f11f15 100644 --- a/src/relic/sga/v7/core.py +++ b/src/relic/sga/v7/core.py @@ -5,7 +5,7 @@ from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.error import Version +from relic.sga.errors import Version from relic.sga._core import VerificationType version = Version(7) diff --git a/src/relic/sga/v9/_serializers.py b/src/relic/sga/v9/_serializers.py index 35baf37..1bfe5b0 100644 --- a/src/relic/sga/v9/_serializers.py +++ b/src/relic/sga/v9/_serializers.py @@ -7,7 +7,7 @@ from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive -from relic.sga.error import MismatchError, VersionMismatchError +from relic.sga.errors import MismatchError, VersionMismatchError from relic.sga.protocols import StreamSerializer from relic.sga._core import StorageType, VerificationType, Version, MagicWord from relic.sga.v9 import core diff --git a/src/relic/sga/v9/core.py b/src/relic/sga/v9/core.py index 9af0a5a..9c87b9d 100644 --- a/src/relic/sga/v9/core.py +++ b/src/relic/sga/v9/core.py @@ -6,7 +6,7 @@ from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.error import Version +from relic.sga.errors import Version from relic.sga._core import VerificationType version = Version(9) diff --git a/src/scripts/universal/sga/common.py b/src/scripts/universal/sga/common.py index 887582a..bac6b57 100644 --- a/src/scripts/universal/sga/common.py +++ b/src/scripts/universal/sga/common.py @@ -5,7 +5,7 @@ from serialization_tools.walkutil import blacklisted -from relic.sga.error import MagicWord +from relic.sga.errors import MagicWord from scripts.universal.common import print_error, print_wrote, print_reading, PrintOptions, SharedExtractorParser SharedSgaParser = argparse.ArgumentParser(parents=[SharedExtractorParser], add_help=False) diff --git a/src/scripts/universal/sga/unpack.py b/src/scripts/universal/sga/unpack.py index 1faf176..1cb31e0 100644 --- a/src/scripts/universal/sga/unpack.py +++ b/src/scripts/universal/sga/unpack.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Dict -from relic.sga.error import FileABC +from relic.sga.errors import FileABC from relic.sga.apis import read_archive from scripts.universal.common import PrintOptions, print_error, print_any, SharedExtractorParser from scripts.universal.sga.common import get_runner From 1ff11fe9b9b1d96579e6c9dc55a06080c5cab687 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 01:54:31 -0800 Subject: [PATCH 13/19] Cleanup old test Most of them are going to be rewritten completely; --- tests/relic/sga/archive/test_archive.py | 210 ------------------ .../relic/sga/archive/test_archive_header.py | 203 ----------------- tests/relic/sga/file/test_file_header.py | 67 ------ tests/relic/sga/test_apis.py | 127 +++++++++++ tests/relic/sga/test_sga.py | 52 ----- tests/relic/sga/test_vX_interface.py | 41 ---- tests/relic/sga/write_sga_samples.py | 103 --------- tests/relic_chunky/test_relic_chunky.py | 74 ------ tests/relic_chunky/write_chunky_samples.py | 24 -- 9 files changed, 127 insertions(+), 774 deletions(-) delete mode 100644 tests/relic/sga/archive/test_archive.py delete mode 100644 tests/relic/sga/archive/test_archive_header.py delete mode 100644 tests/relic/sga/file/test_file_header.py create mode 100644 tests/relic/sga/test_apis.py delete mode 100644 tests/relic/sga/test_sga.py delete mode 100644 tests/relic/sga/test_vX_interface.py delete mode 100644 tests/relic/sga/write_sga_samples.py delete mode 100644 tests/relic_chunky/test_relic_chunky.py delete mode 100644 tests/relic_chunky/write_chunky_samples.py diff --git a/tests/relic/sga/archive/test_archive.py b/tests/relic/sga/archive/test_archive.py deleted file mode 100644 index 65c97e1..0000000 --- a/tests/relic/sga/archive/test_archive.py +++ /dev/null @@ -1,210 +0,0 @@ -import json -from abc import abstractmethod -from io import BytesIO -from pathlib import Path -from typing import Union, Iterable, Tuple - -import pytest -import serialization_tools.magic - -from relic.sga import v2, v5, v9, MagicWord, Version, v7 -from relic.sga.protocols import API -from tests.relic.sga.datagen import DowII, DowI, DowIII - - -# def _ARCHIVE_WALK_SAMPLE(a: Archive) -> ArchiveWalk: -# d = a.drives[0] -# sfs = d.folders -# dfs = d.files -# yield d, None, sfs, dfs -# yield d, sfs[0], [], sfs[0].files - - -class APITests: - @abstractmethod - def test_read(self, buffer: Union[bytes, str], api: API): - if isinstance(buffer, str): - with open(buffer, "rb") as stream: - api.read(stream, True) - else: - with BytesIO(buffer) as stream: - api.read(stream, True) - # def assert_equal(self, expected: Archive, result: Archive, sparse: bool): - # assert expected.meta == result.meta - # if sparse: - # assert result._sparse - # # TODO - - # @abstractmethod - # def test_walk(self, archive: Archive, expected: ArchiveWalk): - # archive_walk = archive.walk() - # for (a_vdrive, a_folder, a_folders, a_files), (e_vdrive, e_folder, e_folders, e_files) in zip(archive_walk, expected): - # assert a_vdrive == e_vdrive - # assert a_folder == e_folder - # assert a_folders == e_folders - # assert a_files == e_files - - # @abstractmethod - # def test_unpack(self, stream_data: bytes, expected: Archive): - # for sparse in TF: - # with BytesIO(stream_data) as stream: - # archive = expected.__class__.unpack(stream, expected.header, sparse) - # assert expected.__class__ == archive.__class__ - # self.assert_equal(expected, archive, sparse) - # - # @abstractmethod - # def test_pack(self, archive: Archive, expected: bytes): - # for write_magic in TF: - # try: - # with BytesIO() as stream: - # packed = archive.pack(stream, write_magic) - # except NotImplementedError: - # pass # Currently not implemented; we'll expect this for now - # else: - # assert expected == packed - - -def scan_directory(root_dir: str, desired_version: Version) -> Iterable[str]: - root_directory = Path(root_dir) - for path_object in root_directory.glob('**/*.sga'): - with path_object.open("rb") as stream: - if not MagicWord.check_magic_word(stream, advance=True): - continue - version = Version.unpack(stream) - if version != desired_version: - continue - yield str(path_object) - - -def fast_gen_dow1_archive(*args): - return None, DowI.gen_sample_archive_buffer(*args) - # return DowI.gen_sample_archive(*args),\ - # DowI.gen_sample_archive_buffer(*args) - - -def buffer_paramefy(files: Iterable[str]) -> Iterable[Tuple[str]]: - return [(_,) for _ in files] - - -try: - path = Path(__file__) - path = path.parent / "file_sources.json" - with path.open() as stream: - file_sources = json.load(stream) -except IOError as e: - file_sources = {} - - -def _helper(src_key: str, version: Version): - try: - return buffer_paramefy(scan_directory(file_sources[src_key], version)) - except: - return tuple() - - -v2Files = _helper("v2", v2.version) -v5Files = _helper("v5", v5.version) -v7Files = _helper("v7", v7.version) -v9Files = _helper("v9", v9.version) - -DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") - - -class TestV2(APITests): - @pytest.fixture() - def api(self) -> API: - return v2.API - - @pytest.mark.parametrize(["buffer"], [(DOW1_ARCHIVE_PACKED,), *v2Files]) - def test_read(self, buffer: Union[bytes, str], api: API): - super(TestV2, self).test_read(buffer, api) - # @pytest.mark.parametrize(["stream_data", "expected"], - # [(DOW1_ARCHIVE_PACKED, DOW1_ARCHIVE)]) - # def test_unpack(self, stream_data: bytes, expected: Archive): - # super().test_unpack(stream_data, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED)]) - # def test_pack(self, archive: Archive, expected: bytes): - # super().test_pack(archive, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW1_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW1_ARCHIVE))]) - # def test_walk(self, archive: Archive, expected: ArchiveWalk): - # super().test_walk(archive, expected) - - -def fast_gen_dow2_archive(*args): - return None, DowII.gen_sample_archive_buffer(*args) - # return DowII.gen_sample_archive(*args),\ - # DowII.gen_sample_archive_buffer(*args) - - -DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") - - -class TestV5(APITests): - @pytest.fixture() - def api(self) -> API: - return v5.API - - @pytest.mark.parametrize(["buffer"], [*v5Files, (DOW2_ARCHIVE_PACKED,)]) - def test_read(self, buffer: Union[bytes, str], api: API): - super(TestV5, self).test_read(buffer, api) - # @pytest.mark.parametrize(["stream_data", "expected"], - # [(DOW2_ARCHIVE_PACKED, DOW2_ARCHIVE)]) - # def test_unpack(self, stream_data: bytes, expected: Archive): - # super().test_unpack(stream_data, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED)]) - # def test_pack(self, archive: Archive, expected: bytes): - # super().test_pack(archive, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW2_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW2_ARCHIVE))]) - # def test_walk(self, archive: Archive, expected: ArchiveWalk): - # super().test_walk(archive, expected) - - -def fast_gen_dow3_archive(*args): - return None, DowIII.gen_sample_archive_buffer(*args) - # return DowIII.gen_sample_archive(*args), \ - # DowIII.gen_sample_archive_buffer(*args) - - -DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED = fast_gen_dow3_archive("Dow3 Test Archive", "Tests", "Some Witty FileName.txt", b"NGL; I'm running out of dumb/clever test data.") - - -class TestV9(APITests): - @pytest.fixture() - def api(self) -> API: - return v9.API - - @pytest.mark.parametrize(["buffer"], [*v9Files, (DOW3_ARCHIVE_PACKED,)]) - def test_read(self, buffer: Union[bytes, str], api: API): - super(TestV9, self).test_read(buffer, api) - # @pytest.mark.parametrize(["stream_data", "expected"], - # [(DOW3_ARCHIVE_PACKED, DOW3_ARCHIVE)]) - # def test_unpack(self, stream_data: bytes, expected: Archive): - # super().test_unpack(stream_data, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED)]) - # def test_pack(self, archive: Archive, expected: bytes): - # super().test_pack(archive, expected) - # - # @pytest.mark.parametrize(["archive", "expected"], - # [(DOW3_ARCHIVE, _ARCHIVE_WALK_SAMPLE(DOW3_ARCHIVE))]) - # def test_walk(self, archive: Archive, expected: ArchiveWalk): - # super().test_walk(archive, expected) - - -class TestV7(APITests): - @pytest.fixture() - def api(self) -> API: - return v7.API - - @pytest.mark.parametrize(["buffer"], [*v7Files]) - def test_read(self, buffer: Union[bytes, str], api: API): - super(TestV7, self).test_read(buffer, api) diff --git a/tests/relic/sga/archive/test_archive_header.py b/tests/relic/sga/archive/test_archive_header.py deleted file mode 100644 index e21d536..0000000 --- a/tests/relic/sga/archive/test_archive_header.py +++ /dev/null @@ -1,203 +0,0 @@ -# from abc import abstractmethod -# from io import BytesIO -# from typing import List, Type -# -# import pytest -# from serialization_tools.ioutil import WindowPtr, Ptr -# from serialization_tools.size import KiB, MiB, GiB -# -# from relic.common import Version -# from relic.sga_old import protocols as proto, v2, v5, v9 -# from relic.sga_old.checksums import gen_md5_checksum, validate_md5_checksum -# from relic.sga_old.common import ArchiveVersion -# from tests.helpers import TF -# from tests.relic.sga.datagen import DowI, DowII, DowIII -# -# -# class ArchiveHeaderTests: -# @abstractmethod # Trick PyCharm into requiring us to redefine this -# def test_validate_checksums(self, archive: bytes, cls: Type[proto.ArchiveHeader]): -# for fast in TF: -# for _assert in TF: -# with BytesIO(archive) as stream: -# stream.seek(12) # skip magic/version -# archive_header = cls.unpack(stream) -# archive_header.validate_checksums(stream, fast=fast, _assert=_assert) -# -# @abstractmethod # Trick PyCharm into requiring us to redefine this -# def test_version(self, archive: proto.ArchiveHeader, expected: Version): -# assert archive.version == expected -# -# @abstractmethod -# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): -# with BytesIO(buffer) as stream: -# unpacked = expected.__class__.unpack(stream) -# assert expected == unpacked -# -# @abstractmethod -# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): -# with BytesIO() as stream: -# written = inst.pack(stream) -# stream.seek(0) -# packed = stream.read() -# assert len(packed) == written -# assert expected == packed -# -# -# _KNOWN_EIGEN = b'06BEF126-4E3C-48D3-8D2E-430BF125B54F' -# _KNOWN_DATA = b'\xf3\x0cGjx:"\xb7O\x89\xc1\x82H\xb2\xa1\xaa\x82-\xe4\\{\xe2\x905\x0c\xdbT\x0c\x82\xa3y\xdat\xd5\xdf\xb7\x04\x1e\xd0\xaa\xf6\xc9|U%\xf7\x0c\xb9\x92\xc9\xbf\xa9\xa3\xaaQ]\xb6\x8c\x10\x87\xc3r\xe3\x89\x16T\x936\xc5l/(\xbd\xbc\x08\xa2\x9b`|\xec\xd5\xf3\xfd\x83\x85\xadHY\xf4U\xb8\x85\x92\xcd\x1d\xc1\xa2\x0f\xbam!\xd5\xacnft>\'\xf0\x12\x9c\x0c\x1c{\xa2\x15VI\xb0\x13\x89\xde\x889\xdc\x15_\xc8\\\x97\x06\xa7\xde\xc0p\xf9o\t\xd3_\x9d\xa7@.\x81\xed\xdd\x13\x9b m9\xf5\x1bV\xc3\xe0\xd4@\x99\xa2\x8aGr\x04\xff\x05\xedIs\x15\t0\x98G\x87O\x9c\xa1\xd2\tcS\xb3\x1eI\xf5\xe3Qp\xe0\xd0m\xbf;\xfb\x856\xa7\\\xb8\xad\x19\xc1\xa3\xaf+\xd4\x08\xd5Y4\x87p|p`dQ\x1c|>is\x17;\xa6\x8d\xa2\xa4\xdc\xe0\xd6\xaf\xc3\x93\xf59\x9a[\x19J\xc88\xb8\xfd/\xe4\xc6J\x8c\xddCY&\x8f' -# _KNOWN_BAD_DATA = b'\xe9F{\x17\xc2\x118\xe4\x0c\xbd\x07\xf2\x07\x03:\xee%\xabx<\xc3\xb5\x98\x7f\xa6[\xc53+Y]t' -# _KNOWN_DATA_MD5 = b'\x0f\xd3\xc3|\xb2d\x16U\xfd\xc2<\x98\x0b\xf1\x91\xde' -# -# -# @pytest.mark.parametrize( -# ["stream_data", "eigen", "md5_checksum"], -# [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5)] -# ) -# def test_gen_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, buffer_sizes: List[int] = None, ptr: Ptr = None): -# buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes -# ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr -# for buffer_size in buffer_sizes: -# with BytesIO(stream_data) as stream: -# result = gen_md5_checksum(stream, eigen, buffer_size, ptr) -# assert md5_checksum == result -# -# -# @pytest.mark.parametrize( -# ["stream_data", "eigen", "md5_checksum", "fail_expected"], -# [(_KNOWN_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, False), -# (_KNOWN_BAD_DATA, _KNOWN_EIGEN, _KNOWN_DATA_MD5, True)] -# ) -# def test_validate_md5_checksum(stream_data: bytes, eigen: bytes, md5_checksum: bytes, fail_expected: bool, ptr: WindowPtr = None, buffer_sizes: List[int] = None, ): -# buffer_sizes = [KiB, MiB, GiB] if buffer_sizes is None else buffer_sizes -# ptr = WindowPtr(0, len(stream_data)) if ptr is None else ptr -# for _assert in TF: -# for buffer_size in buffer_sizes: -# try: -# with BytesIO(stream_data) as stream: -# result = validate_md5_checksum(stream, ptr, eigen, md5_checksum, buffer_size, _assert) -# # Own lines to make assertions clearer -# except AssertionError as e: -# if not fail_expected: # MD5 mismatch; if fail_expected we -# raise e -# else: -# if fail_expected: -# # Invalid and should have asserted -# assert not result and not _assert -# else: -# assert result -# -# -# # Not garunteed to be a valid header -# -# def fast_dow1_archive_header(name, toc_pos, bad_magic: bytes): -# _AB = 0, 120 # Random values -# return DowI.gen_archive_header(name, *_AB, toc_pos=toc_pos), DowI.gen_archive_header_buffer(name, *_AB), DowI.gen_archive_header_buffer(name, *_AB, magic=bad_magic) -# -# -# DOW1_HEADER, DOW1_HEADER_DATA, DOW1_HEADER_DATA_BAD_MAGIC = fast_dow1_archive_header("Dawn Of War 1 Test Header", 180, b"deadbeef") -# # By not writing Magic/Archive TOC-Pos must be changed in the generated DowIIArchiveHeader; the buffers (should be) identical given the same input -# DOW1_HEADER_INNER, DOW1_HEADER_INNER_DATA, _ = fast_dow1_archive_header("Dawn Of War 1 Test Header (Inner Pack)", 168, b"deaddead") -# DOW1_ARCHIVE_BUFFER = DowI.gen_sample_archive_buffer("Dawn Of War 1 Test Archive", "Tests", "Dow1 Header Tests.txt", b"You thought this was a test, but it was me, DIO!") -# -# HDR_START = 12 # Most logic now doesn't handle Magic + Version -# -# -# class TestDowIArchiveHeader(ArchiveHeaderTests): -# @pytest.mark.parametrize( -# ["archive", "cls"], -# [(DOW1_ARCHIVE_BUFFER, v2.ArchiveHeader)]) -# def test_validate_checksums(self, archive: bytes, cls: Type[v2.ArchiveHeader]): -# super().test_validate_checksums(archive, cls) -# -# @pytest.mark.parametrize( -# ["expected", "inst"], -# [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] -# ) -# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): -# super().test_pack(inst, expected) -# -# @pytest.mark.parametrize( -# ["buffer", "expected"], -# [(DOW1_HEADER_INNER_DATA[HDR_START:], DOW1_HEADER_INNER)] -# ) -# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): -# super().test_unpack(buffer, expected) -# -# @pytest.mark.parametrize(["archive", "expected"], [(DOW1_HEADER, v2.version)]) -# def test_version(self, archive: proto.ArchiveHeader, expected: Version): -# super().test_version(archive, expected) -# -# -# # Not garunteed to be a valid header -# -# -# def fast_dow2_archive_header(name): -# _ABC = 0, 0, 0 -# return DowII.gen_archive_header(name, *_ABC), DowII.gen_archive_header_buffer(name, *_ABC) -# -# -# DOW2_HEADER, DOW2_HEADER_DATA = fast_dow2_archive_header("Dawn Of War 2 Test Header") -# DOW2_ARCHIVE_BUFFER = DowII.gen_sample_archive_buffer("Dawn Of War 2 Test Archive", "Dow2 Tests", "Imperial Propoganda.txt", b"By the Emperor, we're ready to unleash eleven barrels, m' lord, sir!") -# -# -# class TestDowIIArchiveHeader(ArchiveHeaderTests): -# @pytest.mark.parametrize( -# ["buffer", "expected"], -# [(DOW2_HEADER_DATA[HDR_START:], DOW2_HEADER)], -# ) -# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): -# super().test_unpack(buffer, expected) -# -# @pytest.mark.parametrize( -# ["inst", "expected"], -# [(DOW2_HEADER, DOW2_HEADER_DATA[HDR_START:])]) -# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): -# super().test_pack(inst, expected) -# -# @pytest.mark.parametrize( -# ["archive", "cls"], -# [(DOW2_ARCHIVE_BUFFER, v5.ArchiveHeader)], -# ) -# def test_validate_checksums(self, archive: bytes, cls: Type[v5.ArchiveHeader]): -# super().test_validate_checksums(archive, cls) -# -# @pytest.mark.parametrize(["archive", "expected"], [(DOW2_HEADER, v5.version)]) -# def test_version(self, archive: proto.ArchiveHeader, expected: Version): -# super().test_version(archive, expected) -# -# -# def fast_dow3_archive_header(name, bad_magic: bytes): -# _ABCD = 0, 1, 2, 3 -# return DowIII.gen_archive_header(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD), DowIII.gen_archive_header_buffer(name, *_ABCD, magic=bad_magic) -# -# -# DOW3_HEADER, DOW3_HEADER_DATA, DOW3_HEADER_DATA_BAD_MAGIC = fast_dow3_archive_header("Dawn Of War 3 Test Header", b" Marine!") # Big Brain Pun in ` Marine!` -# -# -# class TestDowIIIArchiveHeader(ArchiveHeaderTests): -# @pytest.mark.parametrize( -# ["archive", "cls"], -# [(None, v9.ArchiveHeader)]) -# def test_validate_checksums(self, archive: bytes, cls: Type[v9.ArchiveHeader]): -# for fast in TF: -# for _assert in TF: -# # HACK but if it fails it means logic has changed -# assert cls.validate_checksums(None, None, fast=fast, _assert=_assert) -# -# @pytest.mark.parametrize( -# ["buffer", "expected"], -# [(DOW3_HEADER_DATA[HDR_START:], DOW3_HEADER)], -# ) -# def test_unpack(self, buffer: bytes, expected: proto.ArchiveHeader): -# super().test_unpack(buffer, expected) -# -# @pytest.mark.parametrize( -# ["inst", "expected"], -# [(DOW3_HEADER, DOW3_HEADER_DATA[HDR_START:])]) -# def test_pack(self, inst: proto.ArchiveHeader, expected: bytes): -# super().test_pack(inst, expected) -# -# @pytest.mark.parametrize(["archive", "expected"], [(DOW3_HEADER, v9.version)]) -# def test_version(self, archive: proto.ArchiveHeader, expected: Version): -# super().test_version(archive, expected) diff --git a/tests/relic/sga/file/test_file_header.py b/tests/relic/sga/file/test_file_header.py deleted file mode 100644 index fa60b2a..0000000 --- a/tests/relic/sga/file/test_file_header.py +++ /dev/null @@ -1,67 +0,0 @@ -# from abc import abstractmethod -# from io import BytesIO -# -# import pytest -# -# from relic.common import VersionLike -# from relic.sga_old.common import ArchiveVersion -# from relic.sga_old.abc_old_ import FileHeaderABC -# from tests.relic.sga.datagen import DowI, DowII, DowIII -# -# -# class FileHeaderTests: -# @abstractmethod -# def test_pack(self, header: FileHeaderABC, expected: bytes): -# with BytesIO() as stream: -# written = header.pack(stream) -# assert written == len(expected) -# stream.seek(0) -# assert stream.read() == expected -# -# @abstractmethod -# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): -# with BytesIO(data_stream) as stream: -# header = expected.__class__.unpack(stream) -# assert header == expected -# -# -# DOW1_HEADER, DOW1_HEADER_BUFFER = DowI.gen_file_header(0, 0, 0), DowI.gen_file_header_buffer(0, 0, 0) -# -# -# class TestDowIFileHeader(FileHeaderTests): -# @pytest.mark.parametrize(["header", "expected"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) -# def test_pack(self, header: FileHeaderABC, expected: bytes): -# super().test_pack(header, expected) -# -# -# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW1_HEADER, DOW1_HEADER_BUFFER)]) -# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): -# super().test_unpack(data_stream, expected) -# -# -# -# DOW2_HEADER, DOW2_HEADER_BUFFER = DowII.gen_file_header(0, 0, 0), DowII.gen_file_header_buffer(0, 0, 0) -# -# -# class TestDowIIFileHeader(FileHeaderTests): -# @pytest.mark.parametrize(["header", "expected"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) -# def test_pack(self, header: FileHeaderABC, expected: bytes): -# super().test_pack(header, expected) -# -# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW2_HEADER, DOW2_HEADER_BUFFER)]) -# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): -# super().test_unpack(data_stream, expected) -# -# -# -# DOW3_HEADER, DOW3_HEADER_BUFFER = DowIII.gen_file_header(0x0f, 0xf0, 0x09, 0x90), DowIII.gen_file_header_buffer(0x0f, 0xf0, 0x09, 0x90) -# -# -# class TestDowIIIFileHeader(FileHeaderTests): -# @pytest.mark.parametrize(["header", "expected"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) -# def test_pack(self, header: FileHeaderABC, expected: bytes): -# super().test_pack(header, expected) -# -# @pytest.mark.parametrize(["expected", "data_stream"], [(DOW3_HEADER, DOW3_HEADER_BUFFER)]) -# def test_unpack(self, data_stream: bytes, expected: FileHeaderABC): -# super().test_unpack(data_stream, expected) diff --git a/tests/relic/sga/test_apis.py b/tests/relic/sga/test_apis.py new file mode 100644 index 0000000..ec617b0 --- /dev/null +++ b/tests/relic/sga/test_apis.py @@ -0,0 +1,127 @@ +import json +from abc import abstractmethod +from io import BytesIO +from pathlib import Path +from typing import Union, Iterable, Tuple + +import pytest + +from relic.sga import v2, v5, v9, MagicWord, Version, v7 +from relic.sga.protocols import API +from tests.relic.sga.datagen import DowII, DowI, DowIII + + +class APITests: + @abstractmethod + def test_read(self, buffer: Union[bytes, str], api: API): + if isinstance(buffer, str): + with open(buffer, "rb") as stream: + api.read(stream, True) + else: + with BytesIO(buffer) as stream: + api.read(stream, True) + + +def scan_directory(root_dir: str, desired_version: Version) -> Iterable[str]: + root_directory = Path(root_dir) + for path_object in root_directory.glob('**/*.sga'): + with path_object.open("rb") as stream: + if not MagicWord.check_magic_word(stream, advance=True): + continue + version = Version.unpack(stream) + if version != desired_version: + continue + yield str(path_object) + + +def fast_gen_dow1_archive(*args): + return None, DowI.gen_sample_archive_buffer(*args) + + +def prepare_for_parametrize(files: Iterable[str]) -> Iterable[Tuple[str]]: + return [(_,) for _ in files] + + +try: + path = Path(__file__) + path = path.parent / "sources.json" + with path.open() as stream: + file_sources = json.load(stream) +except IOError as e: + file_sources = {} + + +def _helper(src_key: str, version: Version): + try: + local_sources = file_sources.get(src_key,{}) + files = set() + for src_dir in local_sources.get("dirs",[]): + for f in scan_directory(src_dir, version): + files.add(f) + for src_file in local_sources.get("files",[]): + files.add(src_file) + return prepare_for_parametrize(files) + except Exception as e: + return tuple() + + +v2Files = _helper("v2", v2.version) +v5Files = _helper("v5", v5.version) +v7Files = _helper("v7", v7.version) +v9Files = _helper("v9", v9.version) + +DOW1_ARCHIVE, DOW1_ARCHIVE_PACKED = fast_gen_dow1_archive("Dow1 Test Archive", "Tests", "And Now For Something Completely Different.txt", b"Just kidding, it's Monty Python.") + + +class TestV2(APITests): + @pytest.fixture() + def api(self) -> API: + return v2.API + + @pytest.mark.parametrize(["buffer"], [(DOW1_ARCHIVE_PACKED,), *v2Files]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV2, self).test_read(buffer, api) + + +def fast_gen_dow2_archive(*args): + return None, DowII.gen_sample_archive_buffer(*args) + + +DOW2_ARCHIVE, DOW2_ARCHIVE_PACKED = fast_gen_dow2_archive("Dow2 Test Archive", "Tests", "A Favorite Guardsmen VL.txt", b"Where's that artillery!?") + + +class TestV5(APITests): + @pytest.fixture() + def api(self) -> API: + return v5.API + + @pytest.mark.parametrize(["buffer"], [*v5Files, (DOW2_ARCHIVE_PACKED,)]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV5, self).test_read(buffer, api) + + +def fast_gen_dow3_archive(*args): + return None, DowIII.gen_sample_archive_buffer(*args) + + +DOW3_ARCHIVE, DOW3_ARCHIVE_PACKED = fast_gen_dow3_archive("Dow3 Test Archive", "Tests", "Some Witty FileName.txt", b"NGL; I'm running out of dumb/clever test data.") + + +class TestV9(APITests): + @pytest.fixture() + def api(self) -> API: + return v9.API + + @pytest.mark.parametrize(["buffer"], [*v9Files, (DOW3_ARCHIVE_PACKED,)]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV9, self).test_read(buffer, api) + + +class TestV7(APITests): + @pytest.fixture() + def api(self) -> API: + return v7.API + + @pytest.mark.parametrize(["buffer"], [*v7Files]) + def test_read(self, buffer: Union[bytes, str], api: API): + super(TestV7, self).test_read(buffer, api) diff --git a/tests/relic/sga/test_sga.py b/tests/relic/sga/test_sga.py deleted file mode 100644 index 679a8e9..0000000 --- a/tests/relic/sga/test_sga.py +++ /dev/null @@ -1,52 +0,0 @@ -# from __future__ import annotations - -# from io import BytesIO -# from relic.sga import Archive -# from write_sga_samples import build_sample_dow1_archive, build_sample_dow3_archive, build_sample_dow2_archive -# -# -# def assert_archives(left: Archive, right: Archive): -# # ASSERT HEADER -# assert left.header == right.header, (left.header, right.header) -# v = left.header.version -# -# # # ASSERT TOC -# # l_toc, r_toc = left.info.table_of_contents, right.info.table_of_contents -# # assert l_toc.drive_info.count == r_toc.drive_info.count -# # assert l_toc.files_info.count == r_toc.files_info.count -# # if l_toc.filenames_info.count: -# # assert l_toc.filenames_info.count == r_toc.filenames_info.count -# # else: -# # assert l_toc.filenames_info.byte_size == r_toc.filenames_info.byte_size -# # assert l_toc.folders_info.count == r_toc.folders_info.count -# -# r_lookup = {d.path: d for d in right.drives} -# l_lookup = {d.path: d for d in left.drives} -# assert len(r_lookup) == len(l_lookup), "Drive Count" -# for key in l_lookup: -# assert key in r_lookup, "Drive Not Found" -# -# # TODO assert folders and files -# -# -# def run_test(archive: Archive): -# with BytesIO() as buffer: -# archive.pack(buffer, True) -# buffer.seek(0) -# gen_archive = Archive.unpack(buffer) -# assert_archives(archive, gen_archive) -# -# -# def test_archive_DowI(): -# archive = build_sample_dow1_archive() -# run_test(archive) -# -# -# def test_archive_Dow2(): -# archive = build_sample_dow2_archive() -# run_test(archive) -# -# -# def test_archive_Dow3(): -# archive = build_sample_dow3_archive() -# run_test(archive) diff --git a/tests/relic/sga/test_vX_interface.py b/tests/relic/sga/test_vX_interface.py deleted file mode 100644 index 579fcf8..0000000 --- a/tests/relic/sga/test_vX_interface.py +++ /dev/null @@ -1,41 +0,0 @@ -# from typing import Iterable, List, Tuple -# -# from relic.sga import ov2, v5, v7, v9, vX, apis -# import pytest -# -# MODULES = [v2, v5,v7, v9] -# ATTRS = vX.required_attrs -# APIS = io.APIS.values() -# -# -# def _permutate(*items: List): -# def inner_permutate(subset: List, remaining: Tuple[List]) -> Iterable: -# for item in subset: -# if len(remaining) > 1: -# for sub_items in inner_permutate(remaining[0], remaining[1:]): -# yield item, *sub_items # Not possiblie in 3.7-, but we target 3.9+ -# else: -# for sub_item in remaining[0]: -# yield item, sub_item -# -# if len(items) == 0: -# return [] -# elif len(items) == 1: -# return items[0] -# else: -# return inner_permutate(items[0], items[1:]) -# -# -# @pytest.mark.parametrize(["module"], [(m,) for m in MODULES]) -# def test_module_is_vX_api(module): -# assert vX.is_module_api(module) -# -# -# @pytest.mark.parametrize(["module", "attr"], _permutate(MODULES, ATTRS)) -# def test_module_has_required_vX_attr(module, attr: str): -# assert hasattr(module, attr) -# -# -# @pytest.mark.parametrize(["api", "attr"], _permutate(APIS, ATTRS)) -# def test_api_has_required_vX_attr(api, attr: str): -# assert hasattr(api, attr) diff --git a/tests/relic/sga/write_sga_samples.py b/tests/relic/sga/write_sga_samples.py deleted file mode 100644 index 73d6ac8..0000000 --- a/tests/relic/sga/write_sga_samples.py +++ /dev/null @@ -1,103 +0,0 @@ -# import zlib -# from io import BytesIO -# from os.path import join -# -# from archive_tools.structio import WindowPtr, Ptr -# -# from relic.sga.archive.header import DowIArchiveHeader -# from relic.sga.file.file import File -# from relic.sga.file.header import DowIFileHeader -# from relic.sga.folder.folder import Folder -# from relic.sga.vdrive.virtual_drive import VirtualDrive -# from tests.helpers import get_testdata_root_folder, lorem_ipsum -# -# -# def compress16(b: bytes) -> bytes: -# compressor = zlib.compressobj(wbits=14) -# with BytesIO() as stream: -# stream.write(compressor.compress(b)) -# stream.write(compressor.flush()) -# stream.seek(0) -# return stream.read() -# -# -# def compress32(b: bytes) -> bytes: -# compressor = zlib.compressobj(wbits=15) -# with BytesIO() as stream: -# stream.write(compressor.compress(b)) -# stream.write(compressor.flush()) -# stream.seek(0) -# return stream.read() -# -# -# def build_sample_dow1_archive(): -# header = DowIArchiveHeader("DowI Test Data", WindowPtr(None, None), Ptr(None), 0) -# -# raw_content = lorem_ipsum.encode("ascii") -# comp_16_content = compress16(raw_content) -# comp_32_content = compress32(raw_content) -# -# rf = DowIFileHeader(None,None,#, len(raw_content), len(raw_content), FileCompressionFlag.Decompressed) -# raw_file = File(, "Lorem Ipsum Raw", raw_content)) -# comp16_file = File(DowIFileHeader(None, None, len(raw_content), len(comp_16_content), FileCompressionFlag.Compressed16), "Lorem Ipsum Zlib-16", comp_16_content) -# comp32_file = File(DowIFileHeader(None, None, len(raw_content), len(comp_32_content), FileCompressionFlag.Compressed32), "Lorem Ipsum Zlib-32", comp_32_content) -# lorem_folder = Folder([], [raw_file, comp16_file, comp32_file], 0, 3, FolderHeader(None, None, None), "Lorem Ipsum") -# test_drive = VirtualDrive([lorem_folder], [], 1, 0, "test", "Test Drive", None) -# -# archive = Archive(info, [test_drive]) -# return archive -# -# # -# # def build_sample_dow2_archive(): -# # header = ArchiveHeader(SgaVersion.Dow2.value, "DowII Test Data", bytes([0x00] * 16), bytes([0x00] * 16)) -# # -# # info = ArchiveInfo(header, None, None) -# # raw_content = lorem_ipsum.encode("ascii") -# # comp_16_content = compress16(raw_content) -# # comp_32_content = compress32(raw_content) -# # -# # raw_file = File(DowIIFileHeader(None, None, len(raw_content), len(raw_content), 0, 0), "Lorem Ipsum Raw", raw_content) -# # comp16_file = File(DowIIFileHeader(None, None, len(raw_content), len(comp_16_content), 0, 0), "Lorem Ipsum Zlib-16", comp_16_content) -# # comp32_file = File(DowIIFileHeader(None, None, len(raw_content), len(comp_32_content), 0, 0), "Lorem Ipsum Zlib-32", comp_32_content) -# # lorem_folder = Folder([], [raw_file, comp16_file, comp32_file], 0, 3, FolderHeader(None, None, None), "Lorem Ipsum") -# # test_drive = VirtualDrive([lorem_folder], [], 1, 0, "test", "Test Drive", None) -# # -# # archive = Archive(info, [test_drive]) -# # return archive -# # -# # -# # def build_sample_dow3_archive(): -# # header = ArchiveHeader(SgaVersion.Dow3.value, "DowIII Test Data") -# # -# # info = ArchiveInfo(header, None, None) -# # raw_content = lorem_ipsum.encode("ascii") -# # comp_16_content = compress16(raw_content) -# # comp_32_content = compress32(raw_content) -# # -# # raw_file = File(DowIIIFileHeader(None, None, len(raw_content), len(raw_content), 0, 0, 0, 0, 0), -# # "Lorem Ipsum Raw", raw_content) -# # comp16_file = File(DowIIIFileHeader(None, None, len(raw_content), len(comp_16_content), 0, 0, 0, 0, 0), -# # "Lorem Ipsum Zlib-16", comp_16_content) -# # comp32_file = File(DowIIIFileHeader(None, None, len(raw_content), len(comp_32_content), 0, 0, 0, 0, 0), -# # "Lorem Ipsum Zlib-32", comp_32_content) -# # lorem_folder = Folder([], [raw_file, comp16_file, comp32_file], 0, 3, FolderHeader(None, None, None), "Lorem Ipsum") -# # test_drive = VirtualDrive([lorem_folder], [], 1, 0, "test", "Test Drive", None) -# # -# # archive = Archive(info, [test_drive]) -# # return archive -# -# -# if __name__ == "__main__": -# root = get_testdata_root_folder() -# archive = build_sample_dow1_archive() -# with open(join(root, "archive-v2_0.sga"), "wb") as file: -# archive.pack(file) -# # write_archive(file, archive) -# -# # archive2 = build_sample_dow2_archive() -# # with open(join(root, "archive-v5_0.sga"), "wb") as file: -# # write_archive(file, archive2) -# # -# # archive3 = build_sample_dow3_archive() -# # with open(join(root, "archive-v9_0.sga"), "wb") as file: -# # write_archive(file, archive3) diff --git a/tests/relic_chunky/test_relic_chunky.py b/tests/relic_chunky/test_relic_chunky.py deleted file mode 100644 index 948e153..0000000 --- a/tests/relic_chunky/test_relic_chunky.py +++ /dev/null @@ -1,74 +0,0 @@ -# from io import BytesIO -# -# from relic.chunky import RelicChunky, DataChunk, FolderChunk, ChunkyVersion -# # from relic.sga import Archive, writer, File, Folder -# from write_chunky_samples import build_sample_chunky_v1_1 -# -# -# def assert_chunies(left: RelicChunky, right: RelicChunky): -# # ASSERT HEADER -# assert left.header == right.header, "Chunky Version Mismatch" -# v = left.header.version -# assert len(left.chunks) == len(right.chunks), "Chunk Count Mismatch" -# for left, right in zip(left.walk_chunks(True, False), right.walk_chunks(True, False)): -# l_path, l_folders, l_datas = left -# r_path, r_folders, r_datas = right -# -# assert l_path == r_path, "Chunk Path Mismatch" -# assert len(l_folders) == len(r_folders), "Chunk Folder Count Mismatch" -# for l_folder, r_folder in zip(l_folders, r_folders): -# l_folder: FolderChunk -# r_folder: FolderChunk -# # WE do it manualy since we don't expect size to be correct for manually built chunkies -# assert l_folder.header.version == r_folder.header.version, "Chunk Folder Header ('Version') Mismatch" -# assert l_folder.header.name == r_folder.header.name, "Chunk Folder Header ('Name') Mismatch" -# assert l_folder.header.type == r_folder.header.type, "Chunk Folder Header ('Type') Mismatch" -# assert l_folder.header.id == r_folder.header.id, "Chunk Folder Header ('Id') Mismatch" -# if v == ChunkyVersion.v3_1: -# assert l_folder.header.unk_v3_1 == r_folder.header.unk_v3_1, "Chunk Folder Header ('Unks v3.1') Mismatch" -# -# assert len(l_datas) == len(r_datas), "Chunk Data Count Mismatch" -# for l_data, r_data in zip(l_datas, r_datas): -# l_data: DataChunk -# r_data: DataChunk -# assert l_data.header.equal(r_data.header, v), "Chunk Data Header Mismatch" -# assert len(l_data.data) == len(r_data.data), "Chunk Data Size Mismatch" -# for i in range(len(l_data.data)): -# assert l_data.data[i] == r_data.data[i], f"Chunk Data Mismatch @{i}" -# -# # for path, folders, datas in left.walk_chunks() -# -# # assert len(r_lookup) == len(l_lookup), "Drive Count" -# # for key in l_lookup: -# # assert key in r_lookup, "Drive Not Found" -# # -# # for path, folders, files in left.walk(True): -# # for l_folder in folders: -# # r_folder: Folder = right.get_from_path(path, l_folder.name) -# # assert r_folder is not None, "Folder Not Found" -# # assert r_folder.name == l_folder.name, "Foldern Name" -# # assert r_folder.folder_count() == l_folder.folder_count(), "Folder Subfolder Count" -# # assert r_folder.file_count() == l_folder.file_count(), "Folder File Count" -# # for l_file in files: -# # r_data: File = right.get_from_path(path, l_file.name) -# # assert r_data is not None, "File Not Found" -# # assert r_data.name == l_file.name, "File Name" -# # assert len(r_data.data) == len(l_file.data), f"File Data Length" -# # for i in range(len(r_data.data)): -# # assert r_data.data[i] == l_file.data[i], f"File Data Mismatch @{i}" -# # assert r_data.header.decompressed_size == l_file.header.decompressed_size, "File Decompressed Size" -# # assert r_data.header.compressed_size == l_file.header.compressed_size, "File Compressed Size" -# # # TODO assert file flags -# -# -# def run_test(chunky: RelicChunky): -# with BytesIO() as buffer: -# chunky._pack(buffer) -# buffer.seek(0) -# generated = RelicChunky._unpack(buffer) -# assert_chunies(chunky, generated) -# -# -# def test_chunky_v1_1(): -# archive = build_sample_chunky_v1_1() -# run_test(archive) diff --git a/tests/relic_chunky/write_chunky_samples.py b/tests/relic_chunky/write_chunky_samples.py deleted file mode 100644 index 9dec7b6..0000000 --- a/tests/relic_chunky/write_chunky_samples.py +++ /dev/null @@ -1,24 +0,0 @@ -# import zlib -# -# from relic.chunky import RelicChunky, DataChunk, ChunkHeader, ChunkType, FolderChunk, RelicChunkyHeader, ChunkyVersion -# from tests.helpers import lorem_ipsum -# -# -# def build_sample_chunky_v1_1() -> RelicChunky: -# EXD = "EXD " -# EXDC = "EXDC" -# EXDF = "EXDF" -# -# lorem_ipsum_data = lorem_ipsum.encode("ascii") -# lorem_ipsum_compressed = zlib.compress(lorem_ipsum_data) -# -# uncomp_header = ChunkHeader(ChunkType.Data, EXD, 1, len(lorem_ipsum_data), "Lorem Ipsum") -# lorem_ipsum_uncomp = DataChunk(uncomp_header, lorem_ipsum_data) -# comp_header = ChunkHeader(ChunkType.Data, EXDC, 1, len(lorem_ipsum_compressed), "Lorem Ipsum Compressed") -# lorem_ipsum_comp = DataChunk(comp_header, lorem_ipsum_compressed) -# folder_header = ChunkHeader(ChunkType.Folder, EXDF, 1, 0, "Lorem Ipsum Test Data") # size wil be fixed when writing, and is ignored in assetions -# folder = FolderChunk([lorem_ipsum_uncomp,lorem_ipsum_comp],folder_header) -# -# chunky_header = RelicChunkyHeader.default(version=ChunkyVersion.v1_1.value) -# chunky = RelicChunky([folder],chunky_header) -# return chunky From 73a586ef4a75b6cca879adef681213977518abcb Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 01:55:38 -0800 Subject: [PATCH 14/19] Update .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 55dd7a5..8be0d69 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ dist/* # Ignore debug-dev dumped Binary files (Files saved to cwd for fast examination) src/scripts/*.bin ignore/texconv.exe -tests/relic/sga/archive/file_sources.json +tests/relic/sga/sources.json From 32451b3c48161f95fdb9999423f4b5ee2640119b Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 01:55:44 -0800 Subject: [PATCH 15/19] Mypy refactoring --- src/relic/sga/_serializers.py | 14 ++++++++------ src/relic/sga/protocols.py | 3 +-- src/relic/sga/v2/_serializers.py | 12 ++++++------ src/relic/sga/v5/_serializers.py | 14 ++++++-------- src/relic/sga/v7/_serializers.py | 15 ++++++--------- src/relic/sga/v9/_serializers.py | 15 ++++++--------- 6 files changed, 33 insertions(+), 40 deletions(-) diff --git a/src/relic/sga/_serializers.py b/src/relic/sga/_serializers.py index 6e52d04..df0c20d 100644 --- a/src/relic/sga/_serializers.py +++ b/src/relic/sga/_serializers.py @@ -36,11 +36,11 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO) -> DriveDef: - alias: bytes - name: bytes - alias, name, folder_start, folder_end, file_start, file_end, root_folder = self.layout.unpack_stream(stream) - alias: str = alias.rstrip(b"\0").decode("ascii") - name: str = name.rstrip(b"\0").decode("ascii") + encoded_alias: bytes + encoded_name: bytes + encoded_alias, encoded_name, folder_start, folder_end, file_start, file_end, root_folder = self.layout.unpack_stream(stream) + alias: str = encoded_alias.rstrip(b"\0").decode("ascii") + name: str = encoded_name.rstrip(b"\0").decode("ascii") folder_range = (folder_start, folder_end) file_range = (file_start, file_end) return DriveDef(alias=alias, name=name, root_folder=root_folder, folder_range=folder_range, file_range=file_range) @@ -178,10 +178,12 @@ def _chunked_read(stream: BinaryIO, size: Optional[int] = None, chunk_size: Opti break elif size is not None and chunk_size is None: yield stream.read(size) - else: + elif size is not None and chunk_size is not None: # MyPy chunks = size // chunk_size for _ in range(chunks): yield stream.read(chunk_size) total_read = chunk_size * chunks if total_read < size: yield stream.read(size - total_read) + else: + raise Exception("Something impossible happened!") diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index 2910814..b9ccec3 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -5,8 +5,7 @@ from types import ModuleType from typing import TypeVar, Protocol, List, Optional, ForwardRef, Tuple, Iterable, BinaryIO, Type, runtime_checkable -from relic.common import Version -from relic.sga._core import StorageType +from relic.sga._core import StorageType, Version FileFwd = ForwardRef("File") FolderFwd = ForwardRef("Folder") diff --git a/src/relic/sga/v2/_serializers.py b/src/relic/sga/v2/_serializers.py index bc9926a..0bbc966 100644 --- a/src/relic/sga/v2/_serializers.py +++ b/src/relic/sga/v2/_serializers.py @@ -32,9 +32,9 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO) -> FileDef: - storage_type: int - name_pos, storage_type, data_pos, length_on_disk, length_in_archive = self.layout.unpack_stream(stream) - storage_type: StorageType = self.INT2STORAGE[storage_type] + storage_type_val: int + name_pos, storage_type_val, data_pos, length_on_disk, length_in_archive = self.layout.unpack_stream(stream) + storage_type: StorageType = self.INT2STORAGE[storage_type_val] return FileDef(name_pos, data_pos, length_on_disk, length_in_archive, storage_type) def pack(self, stream: BinaryIO, value: FileDef) -> int: @@ -58,8 +58,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> if version != self.version: raise VersionMismatchError(version,self.version) - name: bytes - file_md5, name, header_md5, header_size, data_pos = self.layout.unpack_stream(stream) + encoded_name: bytes + file_md5, encoded_name, header_md5, header_size, data_pos = self.layout.unpack_stream(stream) header_pos = stream.tell() # Seek to header; but we skip that because we are already there toc_header = self.TocHeader.unpack(stream) @@ -76,7 +76,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file.data = lazy_info.read(decompress) file._lazy_info = None - name: str = name.rstrip(b"").decode("utf-16-le") + name: str = encoded_name.rstrip(b"").decode("utf-16-le") file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper) diff --git a/src/relic/sga/v5/_serializers.py b/src/relic/sga/v5/_serializers.py index 5825f05..a0f4680 100644 --- a/src/relic/sga/v5/_serializers.py +++ b/src/relic/sga/v5/_serializers.py @@ -26,14 +26,12 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO): - storage_type: int - verification_type: int - name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type, storage_type = self.layout.unpack_stream(stream) + name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val = self.layout.unpack_stream(stream) modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type: StorageType = StorageType(storage_type) - verification_type:VerificationType = VerificationType(verification_type) + storage_type: StorageType = StorageType(storage_type_val) + verification_type:VerificationType = VerificationType(verification_type_val) return core.FileDef(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type) @@ -60,8 +58,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> if version != self.version: raise VersionMismatchError(version,self.version) - name: bytes - file_md5, name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = self.layout.unpack_stream(stream) + encoded_name: bytes + file_md5, encoded_name, header_md5, header_size, data_pos, header_pos, RSV_1, RSV_0, unk_a = self.layout.unpack_stream(stream) if (RSV_1, RSV_0) != (1, 0): raise MismatchError("Reserved Field", (RSV_1, RSV_0), (1, 0)) # header_pos = stream.tell() @@ -80,7 +78,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file.data = lazy_info.read(decompress) file._lazy_info = None - name: str = name.rstrip(b"").decode("utf-16-le") + name: str = encoded_name.rstrip(b"").decode("utf-16-le") file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper, unk_a) diff --git a/src/relic/sga/v7/_serializers.py b/src/relic/sga/v7/_serializers.py index 295608e..0d9074a 100644 --- a/src/relic/sga/v7/_serializers.py +++ b/src/relic/sga/v7/_serializers.py @@ -26,14 +26,11 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO): - storage_type: int - verification_type: int - - name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type, storage_type, crc, hash_pos = self.layout.unpack_stream(stream) + name_rel_pos, data_rel_pos, length, store_length, modified_seconds, verification_type_val, storage_type_val, crc, hash_pos = self.layout.unpack_stream(stream) modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type: StorageType = StorageType(storage_type) - verification_type: VerificationType = VerificationType(verification_type) + storage_type: StorageType = StorageType(storage_type_val) + verification_type: VerificationType = VerificationType(verification_type_val) return core.FileDef(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type, crc,hash_pos) @@ -58,8 +55,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> raise VersionMismatchError(version,self.version) - name: bytes - name, header_size, data_pos, RSV_1 = self.layout.unpack_stream(stream) + encoded_name: bytes + encoded_name, header_size, data_pos, RSV_1 = self.layout.unpack_stream(stream) if RSV_1 != 1: raise MismatchError("Reserved Field", RSV_1, 1) header_pos = stream.tell() @@ -79,7 +76,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file.data = lazy_info.read(decompress) file._lazy_info = None - name: str = name.rstrip(b"").decode("utf-16-le") + name: str = encoded_name.rstrip(b"").decode("utf-16-le") metadata = core.ArchiveMetadata(unk_a, block_size) return Archive(name, metadata, drives) diff --git a/src/relic/sga/v9/_serializers.py b/src/relic/sga/v9/_serializers.py index 1bfe5b0..d7cef8f 100644 --- a/src/relic/sga/v9/_serializers.py +++ b/src/relic/sga/v9/_serializers.py @@ -26,14 +26,11 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO): - storage_type: int - verification_type: int - - name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type, storage_type, crc = self.layout.unpack_stream(stream) + name_rel_pos, hash_pos, data_rel_pos, length, store_length, modified_seconds,verification_type_val, storage_type_val, crc = self.layout.unpack_stream(stream) modified = datetime.fromtimestamp(modified_seconds, timezone.utc) - storage_type: StorageType = StorageType(storage_type) - verification_type: VerificationType = VerificationType(verification_type) + storage_type: StorageType = StorageType(storage_type_val) + verification_type: VerificationType = VerificationType(verification_type_val) return core.FileDef(name_rel_pos, data_rel_pos, length, store_length, storage_type, modified, verification_type,crc, hash_pos) @@ -58,8 +55,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> raise VersionMismatchError(version,self.version) - name: bytes - name, header_pos, header_size, data_pos, data_pos, RSV_1, sha_256 = self.layout.unpack_stream(stream) + encoded_name: bytes + encoded_name, header_pos, header_size, data_pos, data_pos, RSV_1, sha_256 = self.layout.unpack_stream(stream) if RSV_1 != 1: raise MismatchError("Reserved Field", RSV_1, 1) # header_pos = stream.tell() @@ -79,7 +76,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file.data = lazy_info.read(decompress) file._lazy_info = None - name: str = name.rstrip(b"").decode("utf-16-le") + name: str = encoded_name.rstrip(b"").decode("utf-16-le") metadata = core.ArchiveMetadata(sha_256, unk_a, unk_b, block_size) return Archive(name, metadata, drives) From f560f79849c16f226abaea967fa1cb3413d7c3ee Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 11:53:12 -0800 Subject: [PATCH 16/19] Make relic a namespace package --- mypy.ini | 2 + src/relic/__init__.py | 0 src/relic/chunky/chunk/header.py | 2 - src/relic/chunky/chunky/header.py | 8 +-- src/relic/common.py | 88 ----------------------- src/relic/config.py | 115 ------------------------------ src/relic/ucs.py | 36 +--------- src/scripts/dump_sga.py | 109 ---------------------------- 8 files changed, 6 insertions(+), 354 deletions(-) create mode 100644 mypy.ini delete mode 100644 src/relic/__init__.py delete mode 100644 src/relic/common.py delete mode 100644 src/relic/config.py delete mode 100644 src/scripts/dump_sga.py diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..976ba02 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,2 @@ +[mypy] +ignore_missing_imports = True diff --git a/src/relic/__init__.py b/src/relic/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/relic/chunky/chunk/header.py b/src/relic/chunky/chunk/header.py index b513ba8..aa7f80d 100644 --- a/src/relic/chunky/chunk/header.py +++ b/src/relic/chunky/chunk/header.py @@ -8,8 +8,6 @@ from serialization_tools.vstruct import VStruct from ..chunky.header import ChunkyVersion -from relic.common import VersionLike -from ...common import VersionError class ChunkType(Enum): diff --git a/src/relic/chunky/chunky/header.py b/src/relic/chunky/chunky/header.py index bbc3255..92fc258 100644 --- a/src/relic/chunky/chunky/header.py +++ b/src/relic/chunky/chunky/header.py @@ -7,19 +7,17 @@ from serialization_tools.magic import MagicWordIO, MagicWord from serialization_tools.structx import Struct -from relic.common import VersionError -# from relic.sga.common import VersionEnum, Version, VersionLike VersionEnum = Version = VersionLike = None ChunkyVersionLayout = Struct("< 2L") class ChunkyVersion(Enum): Unsupported = None - v0101 = None # Version(1, 1) + v0101 = None # Version(1, 1) Dow = v0101 # ALIAS for Prettiness - v0301 = None # Version(3, 1) + v0301 = None # Version(3, 1) Dow2 = v0301 # ALIAS for Prettiness - v4010 = None #Version(4, 1) + v4010 = None # Version(4, 1) @classmethod def unpack_version(cls, stream: BinaryIO) -> Version: diff --git a/src/relic/common.py b/src/relic/common.py deleted file mode 100644 index 3c2a745..0000000 --- a/src/relic/common.py +++ /dev/null @@ -1,88 +0,0 @@ -from dataclasses import dataclass -from enum import Enum -from typing import Union, Optional, Type, List - -from serialization_tools.structx import Struct - - -class ListableEnum(Enum): - @classmethod - def list(cls): - return list(map(lambda c: c.value, cls)) - - @staticmethod - def get_list(cls: Type[Enum]): - return list(map(lambda c: c.value, cls)) - - -class VersionEnum(ListableEnum): - def __eq__(self, other): - if isinstance(other, VersionEnum): - return self.value == other.value - elif isinstance(other, Version): - return self.value == other - else: - super().__eq__(other) - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return self.value.__hash__() - - -@dataclass -class Version: - major: int - minor: Optional[int] = 0 - - _32 = Struct("< H H") - _64 = Struct("< L L") - - def __str__(self) -> str: - return f"Version {self.major}.{self.minor}" - - def __eq__(self, other): - if other is None: - return False - elif isinstance(other, VersionEnum): - return self.major == other.value.major and self.minor == other.value.minor - elif isinstance(other, Version): - return self.major == other.major and self.minor == other.minor - else: - return super().__eq__(other) - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - # Realistically; Version will always be <256 - # But we could manually set it to something much bigger by accident; and that may cause collisions - return self.major << 32 + self.minor - - -VersionLike = Union[Version, VersionEnum] - - -class VersionError(Exception): - def __init__(self, version: VersionLike = None, supported: Union[List[Version], Version, Type[VersionEnum], VersionEnum] = None, *args): - super().__init__(*args) - self.version = version - if supported: - if issubclass(supported, VersionEnum): - supported = ListableEnum.get_list(supported) - elif not isinstance(supported, list): - supported = [supported] - self.supported = supported - - def __str__(self): - msg = "Unexpected version" - if self.version or self.supported: - msg += ";" - if self.version: - msg += f" got {repr(self.version)}" - if self.version and self.supported: - msg += "," - if self.supported: - msg += f" expected {repr(self.supported)}" - return msg + "!" diff --git a/src/relic/config.py b/src/relic/config.py deleted file mode 100644 index c956374..0000000 --- a/src/relic/config.py +++ /dev/null @@ -1,115 +0,0 @@ -from enum import Enum -from os import PathLike -from os.path import join, exists, abspath -from pathlib import Path, PurePath -from typing import Optional, Iterable, Tuple, Set - -import serialization_tools.common_directories - -dll_folder = abspath(join(__file__, "..\\..\\..\\Required EXEs")) -aifc_decoder_path = join(dll_folder, "dec.exe") -aifc_encoder_path = join(dll_folder, "enc.exe") -texconv_path = join(dll_folder, "texconv.exe") - - -def get_path_to_steam_library(steam_directory: PathLike = None) -> Path: - steam_directory = (PurePath(steam_directory) if steam_directory else steam_directory) or archive_tools.common_directories.get_steam_install_dir() - return steam_directory / "steamapps" / "common" - - -class DowIIIGame(Enum): - BaseGame = 0 - - -class DowIIGame(Enum): - Retribution = 2 - ChaosRising = 1 - BaseGame = 0 - - -class DowGame(Enum): - SoulStorm = 4 - DarkCrusade = 3 - WinterAssault = 2 - Gold = 1 - BaseGame = 0 - - -dow_game_paths = { - DowIIIGame.BaseGame: "Dawn of War III", - - DowIIGame.Retribution: "Dawn of War II - Retribution", - - DowGame.SoulStorm: "Dawn of War Soulstorm", - DowGame.DarkCrusade: "Dawn of War Dark Crusade", - DowGame.WinterAssault: "Dawn of War Winter Assault", - DowGame.Gold: "Dawn of War Gold", - # DowGame.BaseGame:"Dawn of War", # The original dawn of war probably doesn't include 'Gold', IDK what it is specifically but this would be my first guess -} - - -def get_dow_root_directories() -> Iterable[Tuple[DowGame, Path]]: - steam_path = get_path_to_steam_library() - for game, partial_path in dow_game_paths.items(): - path = steam_path / partial_path - if exists(path): - yield game, path - - -def filter_unique_dow_game(dow_root_directories: Iterable[Tuple[DowGame, Path]]) -> Iterable[Tuple[DowGame, Path]]: - unique: Set[DowGame] = set() - for game, path in dow_root_directories: - if game in unique: - continue - yield game, path - unique.add(game) - - -# Allows us to get the most -# up-to-date dump of all assets: -# Gold (I believe) only contains Space Marines, Orks, Chaos, & Eldar -# Winter Assault Adds Imperial Guard -# Dark Crusade Adds Tau & Necrons -# SoulStorm Adds Dark Eldar & Sisters Of Battle -# If we only want to dump ONE game; we'd want to dump the latest to get all the assets from the previous one -# Except for campaign assets; which are unique to each install -# For Campaign assets, use get_unique and dump each to a separate directory (or order the dumps such that later games come after earlier games) -def filter_latest_dow_game(dow_root_directories: Iterable[Tuple[DowGame, Path]], series: Enum = DowGame) -> Optional[Tuple[DowGame, Path]]: - latest = latest_path = None - for game, path in dow_root_directories: - if not isinstance(game, series): - continue - if latest and latest.value > game.value: - continue - latest = game - latest_path = path - if latest: - return latest, latest_path - return None - - -def get_latest_dow_game() -> Optional[Tuple[DowGame, Path]]: - return filter_latest_dow_game(get_dow_root_directories(), series=DowGame) - - -def get_latest_dow2_game() -> Optional[Tuple[DowGame, Path]]: - return filter_latest_dow_game(get_dow_root_directories(), series=DowIIGame) - - -def get_latest_dow3_game() -> Optional[Tuple[DowGame, Path]]: - return filter_latest_dow_game(get_dow_root_directories(), series=DowIIIGame) - - -def get_unique_dow_game() -> Iterable[Tuple[DowGame, Path]]: - return filter_unique_dow_game(get_dow_root_directories()) - - -if __name__ == "__main__": - print("\nAll Dirs") - for game, path in get_dow_root_directories(): - print(game.name, ":\t", path) - - print("\nLatest") - dirs = get_dow_root_directories() - latest = filter_latest_dow_game(dirs) - print(latest) diff --git a/src/relic/ucs.py b/src/relic/ucs.py index db40f92..c71acf1 100644 --- a/src/relic/ucs.py +++ b/src/relic/ucs.py @@ -1,19 +1,15 @@ from __future__ import annotations -import json import re -from os import PathLike, walk from collections import UserDict +from os import PathLike, walk from os.path import join, splitext, split -from pathlib import Path from typing import TextIO, Optional, Iterable, Union, Mapping # UCS probably stands for UnicodeString # I personally think that's a horribly misleading name for this file from serialization_tools.walkutil import filter_by_file_extension, collapse_walk_on_files, filter_by_path -from relic.config import DowIIIGame, DowGame, DowIIGame, filter_latest_dow_game, get_dow_root_directories - class UcsDict(UserDict): def write_stream(self, stream: TextIO, ordered: bool = False) -> int: @@ -175,33 +171,3 @@ def get_lang_string_for_file(environment: Union[LangEnvironment, LangFile], file replacement = _file_safe_string(replacement) return join(dir_path, replacement + f" ~ Clip {num}" + ext) - - -if __name__ == "__main__": - # A compromise between an automatic location and NOT the local directory - # PyCharm will hang trying to reload the files (just to update the hierarchy, not update references) - # To avoid that, we DO NOT use a local directory, but an external directory - # TODO add a persistent_data path to archive tools - Root = Path(r"~\Appdata\Local\ModernMAK\ArchiveTools\Relic-SGA").expanduser() - dump_type = "UCS_DUMP" - path_lookup = { - DowIIIGame: Root / r"DOW_III", - DowIIGame: Root / r"DOW_II", - DowGame: Root / r"DOW_I" - } - series = DowGame - out_path = path_lookup[series] / dump_type - r = filter_latest_dow_game(get_dow_root_directories(), series=series) - if r: - game, in_path = r - else: - raise FileNotFoundError("Couldn't find any suitable DOW games!") - - print("Loading Locale Environment...") - lang_env = LangEnvironment.load_environment(in_path) - print(f"\tReading from '{in_path}'") - out_path = out_path.with_suffix(".json") - with open(out_path, "w") as handle: - lang_env_sorted = dict(sorted(lang_env.items())) - json.dump(lang_env_sorted, handle, indent=4) - print(f"\tSaved to '{out_path}'") diff --git a/src/scripts/dump_sga.py b/src/scripts/dump_sga.py deleted file mode 100644 index 8dd0638..0000000 --- a/src/scripts/dump_sga.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -from os.path import splitext, dirname, basename -from pathlib import Path -from typing import Iterable - -from serialization_tools.walkutil import BlackList, WhiteList, filter_by_path, filter_by_file_extension, collapse_walk_on_files - -import relic.sga_old.common -import relic.sga_old.io -from relic.config import DowIIIGame, DowIIGame, DowGame, filter_latest_dow_game, get_dow_root_directories - -from relic.sga_old.common.archive import ArchiveMagicWord, Archive - - -def __safe_makedirs(path: str, use_dirname: bool = True): - if use_dirname: - path = dirname(path) - try: - os.makedirs(path) - except FileExistsError: - pass - - -# walk all archives in the given directory, custom whitelist, blacklist, and extensions will overwrite defaults -# Defaults: .sga, No *-Med, *-Low archives -def walk_archive_paths(folder: os.PathLike, extensions: WhiteList = None, whitelist: WhiteList = None, blacklist: BlackList = None) -> Iterable[str]: - # Default EXT and Blacklist - extensions = extensions or "sga" # Default to sga, it shouldn't ever be different, so I could probably - blacklist = blacklist or ["-Low", "-Med"] # Typically, beside -High files, we only want the biggest - # Flattened long call to make it easy to read - walk = os.walk(folder) - walk = filter_by_path(walk, whitelist=whitelist, blacklist=blacklist, prune=True) - walk = filter_by_file_extension(walk, whitelist=extensions) - walk = relic.sga.common.walk(walk) - return collapse_walk_on_files(walk) - - -def dump_archive(input_folder: os.PathLike, output_folder: os.PathLike, overwrite: bool = False, update: bool = False): - if overwrite and update: - raise NotImplementedError("Both write options selected, would you like to overwrite files? Or only update non-matching files?") - - output_folder_path = Path(output_folder) - for input_file_path in walk_archive_paths(input_folder): - with open(input_file_path, "rb") as in_handle: - archive = relic.sga.io.unpack_archive(in_handle) - archive_name = splitext(basename(input_file_path))[0] - with archive.header.data_ptr.stream_jump_to(in_handle) as data_stream: - print(f"\tDumping '{archive_name}'") - for _, _, _, files in relic.sga.common.walk(): - for file in files: - relative_file_path = file.full_path - - if ':' in relative_file_path.parts[0]: - relative_file_path = str(relative_file_path).replace(":", "") - - output_file_path = output_folder_path / archive_name / relative_file_path - - msg = f"Writing '{relative_file_path}'" - skip = False - if output_file_path.exists(): - if update: - if output_file_path.stat().st_size == file.header.decompressed_size: - msg = f"Skipping (Up to date - Decompressed Size Match)" - skip = True - elif output_file_path.stat().st_size == file.header.compressed_size: - msg = f"Skipping (Up to date - Compressed Size Match)" - skip = True - else: - msg = f"Updating" - elif not overwrite: - msg = f"Skipping (Exists)" - skip = True - - print(f"\t\t{msg} '{relative_file_path}'") - if skip: - continue - __safe_makedirs(str(output_file_path)) - with open(output_file_path, "wb") as out_handle: - data = file.read_data(data_stream, True) - out_handle.write(data) - print(f"\t\t\tWrote to '{output_file_path}'") - - # write_binary(walk, output_folder, decompress, write_ext) - - -if __name__ == "__main__": - # A compromise between an automatic location and NOT the local directory - # PyCharm will hang trying to reload the files (just to update the hierarchy, not update references) - # To avoid that, we DO NOT use a local directory, but an external directory - # TODO add a persistent_data path to archive tools - Root = Path(r"~\Appdata\Local\ModernMAK\ArchiveTools\Relic-SGA").expanduser() - dump_type = "SGA_DUMP" - path_lookup = { - DowIIIGame: Root / r"DOW_III", - DowIIGame: Root / r"DOW_II", - DowGame: Root / r"DOW_I" - } - series = DowGame - out_path = path_lookup[series] / dump_type - r = filter_latest_dow_game(get_dow_root_directories(), series=series) - if r: - game, in_path = r - else: - raise FileNotFoundError("Couldn't find any suitable DOW games!") - print(f"Dumping game '{game}' from '{in_path}' to '{out_path}'\n") - dump_archive(in_path, out_path, update=True) - print(f"\nDumped game '{game}' from '{in_path}' to '{out_path}'") - # dump_all_sga(root, blacklist=[r"-Low", "-Med"], - # out_dir=r"D:/Dumps/DOW I/sga", verbose=True) From 85a8f67259ff846b92be64795960e747b2876fba Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 11:55:32 -0800 Subject: [PATCH 17/19] more refactoring / setting up testing --- src/relic/sga/__init__.py | 6 ++- src/relic/sga/_abc.py | 37 +++++++++++++----- src/relic/sga/_core.py | 3 +- src/relic/sga/_serializers.py | 35 +++++++++++++++-- src/relic/sga/protocols.py | 12 +++++- src/relic/sga/py.typed | 0 src/relic/sga/v2/_serializers.py | 7 ++-- src/relic/sga/v2/core.py | 15 +------ src/relic/sga/v5/_serializers.py | 7 ++-- src/relic/sga/v5/core.py | 17 +------- src/relic/sga/v7/_serializers.py | 2 +- src/relic/sga/v9/_serializers.py | 2 +- src/relic/sga/v9/core.py | 3 +- tests/relic/sga/datagen/v2.py | 36 +++++++++++++++++ tests/relic/sga/test_apis.py | 26 ++++++++---- .../relic/sga/test_data/v2/DowI Test Data.sga | Bin 16 files changed, 144 insertions(+), 64 deletions(-) create mode 100644 src/relic/sga/py.typed create mode 100644 tests/relic/sga/datagen/v2.py rename test_data/sga/archive-v2_0.sga => tests/relic/sga/test_data/v2/DowI Test Data.sga (100%) diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py index 95bd115..f9f73c6 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga/__init__.py @@ -1,10 +1,12 @@ from __future__ import annotations from relic.sga._apis import apis as APIs -from relic.sga._core import Version, MagicWord +from relic.sga._core import Version, MagicWord, StorageType, VerificationType __all__ = [ "APIs", "Version", - "MagicWord" + "MagicWord", + "StorageType", + "VerificationType" ] diff --git a/src/relic/sga/_abc.py b/src/relic/sga/_abc.py index 5c1f1d1..51dcb32 100644 --- a/src/relic/sga/_abc.py +++ b/src/relic/sga/_abc.py @@ -9,7 +9,7 @@ from typing import List, Optional, Tuple, BinaryIO, Type, Generic from relic.sga import protocols as p -from relic.sga.protocols import TFileMetadata, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer +from relic.sga.protocols import TFileMetadata, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer, IOContainer from relic.sga._core import StorageType from relic.sga.errors import Version @@ -27,8 +27,10 @@ class _FileLazyInfo: packed_size: int unpacked_size: int stream: BinaryIO + decompress: bool - def read(self, decompress: bool) -> bytes: + def read(self, decompress: Optional[bool] = None) -> bytes: + decompress = self.decompress if decompress is None else decompress jump_back = self.stream.tell() self.stream.seek(self.jump_to) buffer = self.stream.read(self.packed_size) @@ -67,16 +69,31 @@ class FileDefABC: @dataclass class File(p.File[TFileMetadata]): name: str - data: Optional[bytes] + _data: Optional[bytes] storage_type: StorageType - metadata: Optional[TFileMetadata] = None - parent: Optional[IONode] = None + _is_compressed: bool + metadata: TFileMetadata + parent: Optional[IOContainer] = None _lazy_info: Optional[_FileLazyInfo] = None - _is_compressed: bool = None + + @property + def data(self) -> bytes: + if self._data is None: + if self._lazy_info is None: + raise TypeError("Data was not loaded!") + else: + self._data = self._lazy_info.read() + self._lazy_info = None + return self._data + + @data.setter + def data(self, value: bytes) -> None: + self._data = value @contextmanager - def open(self, read_only: bool = True) -> BinaryIO: - with BytesIO(self.data) as stream: + def open(self, read_only: bool = True): + data = self.data + with BytesIO(data) as stream: yield stream if not read_only: stream.seek(0) @@ -87,6 +104,8 @@ def is_compressed(self) -> bool: return self._is_compressed def compress(self) -> None: + if self.data is None: + raise TypeError("Data was not loaded!") if not self._is_compressed: self.data = zlib.compress(self.data) self._is_compressed = True @@ -166,7 +185,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> return self._serializer.read(stream, lazy, decompress) def write(self, stream: BinaryIO, archive: TArchive) -> int: - return self._serializer.write(stream,archive) + return self._serializer.write(stream, archive) class APISerializer(Generic[TArchive]): diff --git a/src/relic/sga/_core.py b/src/relic/sga/_core.py index 4e78667..437720d 100644 --- a/src/relic/sga/_core.py +++ b/src/relic/sga/_core.py @@ -2,12 +2,12 @@ from dataclasses import dataclass from enum import Enum + from serialization_tools.structx import Struct from typing import Optional, ClassVar, BinaryIO from serialization_tools.magic import MagicWordIO - MagicWord = MagicWordIO(Struct("< 8s"), "_ARCHIVE".encode("ascii")) @@ -46,7 +46,6 @@ def pack(self, stream: BinaryIO): return layout.pack_stream(stream, *args) - class StorageType(int, Enum): Store = 0 BufferCompress = 1 diff --git a/src/relic/sga/_serializers.py b/src/relic/sga/_serializers.py index df0c20d..774dc5a 100644 --- a/src/relic/sga/_serializers.py +++ b/src/relic/sga/_serializers.py @@ -1,12 +1,16 @@ from __future__ import annotations +import hashlib from dataclasses import dataclass from typing import BinaryIO, List, Dict, Optional, Callable, Tuple, Iterable +from serialization_tools.size import KiB from serialization_tools.structx import Struct from relic.sga import _abc from relic.sga._abc import DriveDef, FolderDef, FileDefABC as FileDef, _FileLazyInfo, FileDefABC +from relic.sga._core import StorageType +from relic.sga.errors import MD5MismatchError from relic.sga.protocols import TFileMetadata, IOContainer, StreamSerializer, T, TFile, TDrive @@ -67,7 +71,7 @@ def pack(self, stream: BinaryIO, value: FolderDef) -> int: return self.layout.pack_stream(stream, *args) -def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderDef], file_defs: List[FileDef], names: Dict[int, str], data_pos: int, stream: BinaryIO, build_file_meta: Optional[Callable[[FileDef], TFileMetadata]] = None) -> Tuple[List[_abc.Drive], List[_abc.File]]: +def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderDef], file_defs: List[FileDef], names: Dict[int, str], data_pos: int, stream: BinaryIO, build_file_meta: Optional[Callable[[FileDef], TFileMetadata]] = None, decompress:bool=False) -> Tuple[List[_abc.Drive], List[_abc.File]]: all_files: List[TFile] = [] drives: List[TDrive] = [] for drive_def in drive_defs: @@ -78,8 +82,9 @@ def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderD for file_def in local_file_defs: name = names[file_def.name_pos] metadata = build_file_meta(file_def) if build_file_meta is not None else None - lazy_info = _FileLazyInfo(data_pos + file_def.data_pos, file_def.length_in_archive, file_def.length_on_disk, stream) - file = _abc.File(name, None, file_def.storage_type, metadata, None, lazy_info) + lazy_info = _FileLazyInfo(data_pos + file_def.data_pos, file_def.length_in_archive, file_def.length_on_disk, stream, decompress) + file_compressed = file_def.storage_type != StorageType.Store + file = _abc.File(name=name,_data=None,storage_type=file_def.storage_type,_is_compressed=file_compressed,metadata=metadata, _lazy_info=lazy_info) files.append(file) folders: List[_abc.Folder] = [] @@ -187,3 +192,27 @@ def _chunked_read(stream: BinaryIO, size: Optional[int] = None, chunk_size: Opti yield stream.read(size - total_read) else: raise Exception("Something impossible happened!") + + +@dataclass +class _Md5ChecksumHelper: + expected: bytes + stream: BinaryIO + start: int + size: Optional[int] = None + eigen: Optional[bytes] = None + + def read(self,stream:Optional[BinaryIO] = None) -> bytes: + stream = self.stream if stream is None else stream + stream.seek(self.start) + md5 = hashlib.md5(self.eigen) + # Safer for large files to read chunked + for chunk in _chunked_read(stream,self.size,256*KiB): + md5.update(chunk) + md5_str = md5.hexdigest() + return bytes.fromhex(md5_str) + + def validate(self, stream: Optional[BinaryIO] = None) -> None: + result = self.read(stream) + if self.expected != result: + raise MD5MismatchError(result,self.expected) diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index b9ccec3..8c55657 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -56,7 +56,15 @@ def walk(self) -> IOWalk: class File(IOPathable, IONode, Protocol[TFileMetadata]): name: str - data: bytes + + @property + def data(self) -> bytes: + raise NotImplementedError + + @data.setter + def data(self, value: bytes) -> None: + raise NotImplementedError + storage_type: StorageType metadata: TFileMetadata @@ -91,7 +99,7 @@ class Archive(IOWalkable, Protocol[TMetadata]): class API(Protocol[TArchive, TDrive, TFolder, TFile]): - version:Version + version: Version Archive: Type[TArchive] Drive: Type[TDrive] Folder: Type[TFolder] diff --git a/src/relic/sga/py.typed b/src/relic/sga/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/relic/sga/v2/_serializers.py b/src/relic/sga/v2/_serializers.py index 0bbc966..9423f65 100644 --- a/src/relic/sga/v2/_serializers.py +++ b/src/relic/sga/v2/_serializers.py @@ -4,6 +4,7 @@ from serialization_tools.structx import Struct +import relic.sga._serializers from relic.sga import _abc, _serializers as _s from relic.sga._abc import FileDefABC as FileDef, Archive from relic.sga.errors import VersionMismatchError @@ -65,7 +66,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> toc_header = self.TocHeader.unpack(stream) drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) - drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream,decompress=decompress) if not lazy: for file in files: @@ -77,8 +78,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file._lazy_info = None name: str = encoded_name.rstrip(b"").decode("utf-16-le") - file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) - header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) + file_md5_helper = relic.sga._serializers._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) + header_md5_helper = relic.sga._serializers._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper) return Archive(name, metadata, drives) diff --git a/src/relic/sga/v2/core.py b/src/relic/sga/v2/core.py index eb4f8d8..c0b3e3f 100644 --- a/src/relic/sga/v2/core.py +++ b/src/relic/sga/v2/core.py @@ -1,27 +1,14 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, BinaryIO from relic.sga import _abc +from relic.sga._serializers import _Md5ChecksumHelper from relic.sga.errors import Version version = Version(2) -@dataclass -class _Md5ChecksumHelper: - expected: bytes - stream: BinaryIO - start: int - size: Optional[int] = None - eigen: Optional[bytes] = None - - def validate(self, stream: BinaryIO = None) -> None: - stream = self.stream if stream is None else stream - stream.seek(self.start) - - @dataclass class ArchiveMetadata: @property diff --git a/src/relic/sga/v5/_serializers.py b/src/relic/sga/v5/_serializers.py index a0f4680..199e469 100644 --- a/src/relic/sga/v5/_serializers.py +++ b/src/relic/sga/v5/_serializers.py @@ -5,6 +5,7 @@ from serialization_tools.structx import Struct +import relic.sga._serializers from relic.sga import _abc, _serializers as _s from relic.sga._abc import Archive from relic.sga.errors import MismatchError, VersionMismatchError @@ -67,7 +68,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> toc_header = self.TocHeader.unpack(stream) drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) - drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream,decompress=decompress) if not lazy: for file in files: @@ -79,8 +80,8 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> file._lazy_info = None name: str = encoded_name.rstrip(b"").decode("utf-16-le") - file_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) - header_md5_helper = core._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) + file_md5_helper = relic.sga._serializers._Md5ChecksumHelper(file_md5, stream, header_pos, eigen=self.FILE_MD5_EIGEN) + header_md5_helper = relic.sga._serializers._Md5ChecksumHelper(file_md5, stream, header_pos, header_size, eigen=self.FILE_MD5_EIGEN) metadata = core.ArchiveMetadata(file_md5_helper, header_md5_helper, unk_a) return Archive(name, metadata, drives) diff --git a/src/relic/sga/v5/core.py b/src/relic/sga/v5/core.py index 803a337..c7e2698 100644 --- a/src/relic/sga/v5/core.py +++ b/src/relic/sga/v5/core.py @@ -2,29 +2,16 @@ from dataclasses import dataclass from datetime import datetime -from typing import Optional, BinaryIO from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.errors import Version from relic.sga._core import VerificationType +from relic.sga._serializers import _Md5ChecksumHelper +from relic.sga.errors import Version version = Version(5) -@dataclass -class _Md5ChecksumHelper: - expected: bytes - stream: BinaryIO - start: int - size: Optional[int] = None - eigen: Optional[bytes] = None - - def validate(self, stream: BinaryIO = None) -> None: - stream = self.stream if stream is None else stream - stream.seek(self.start) - - @dataclass class ArchiveMetadata: @property diff --git a/src/relic/sga/v7/_serializers.py b/src/relic/sga/v7/_serializers.py index 0d9074a..e9ba1a3 100644 --- a/src/relic/sga/v7/_serializers.py +++ b/src/relic/sga/v7/_serializers.py @@ -65,7 +65,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> unk_a, block_size = self.metadata_layout.unpack_stream(stream) drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) names = _s._read_toc_names_as_count(stream, toc_header.name_info, header_pos) - drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream,decompress=decompress) if not lazy: for file in files: diff --git a/src/relic/sga/v9/_serializers.py b/src/relic/sga/v9/_serializers.py index d7cef8f..13e1a46 100644 --- a/src/relic/sga/v9/_serializers.py +++ b/src/relic/sga/v9/_serializers.py @@ -65,7 +65,7 @@ def read(self, stream: BinaryIO, lazy: bool = False, decompress: bool = True) -> unk_a, unk_b, block_size = self.metadata_layout.unpack_stream(stream) drive_defs, folder_defs, file_defs = _s._read_toc_definitions(stream, toc_header, header_pos, self.DriveDef, self.FolderDef, self.FileDef) names = _s._read_toc_names_as_size(stream, toc_header.name_info, header_pos) - drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream) + drives, files = _s._assemble_io_from_defs(drive_defs, folder_defs, file_defs, names, data_pos, stream, decompress=decompress) if not lazy: for file in files: diff --git a/src/relic/sga/v9/core.py b/src/relic/sga/v9/core.py index 9c87b9d..fdce714 100644 --- a/src/relic/sga/v9/core.py +++ b/src/relic/sga/v9/core.py @@ -2,12 +2,11 @@ from dataclasses import dataclass from datetime import datetime -from typing import Optional, BinaryIO from relic.sga import _abc from relic.sga._abc import FileDefABC -from relic.sga.errors import Version from relic.sga._core import VerificationType +from relic.sga.errors import Version version = Version(9) diff --git a/tests/relic/sga/datagen/v2.py b/tests/relic/sga/datagen/v2.py new file mode 100644 index 0000000..5f44f28 --- /dev/null +++ b/tests/relic/sga/datagen/v2.py @@ -0,0 +1,36 @@ +from typing import Optional, List, BinaryIO +from relic.sga._serializers import _Md5ChecksumHelper +from relic.sga import StorageType +from relic.sga.protocols import IOContainer +from relic.sga.v2 import API, core, _serializers as _s + + +def generate_file(name: str, data: bytes, storage: StorageType, compressed: bool = False, parent: Optional[IOContainer] = None) -> API.File: + return core.File(name, data, storage, compressed, None, parent, None) + + +def generate_folder(name: str, folders: Optional[List[API.Folder]] = None, files: Optional[List[API.File]] = None, parent: Optional[IOContainer] = None) -> API.Folder: + folders = [] if folders is None else folders + files = [] if files is None else files + return core.Folder(name, folders, files, parent=parent) + + +def generate_drive(name: str, folders: Optional[List[API.Folder]] = None, files: Optional[List[API.File]] = None, alias: str = "data") -> API.Drive: + folders = [] if folders is None else folders + files = [] if files is None else files + return core.Drive(alias, name, folders, files) + + +def generate_archive_meta(stream: BinaryIO, header_pos: int, header_size: int) -> core.ArchiveMetadata: + header_helper = _Md5ChecksumHelper(None, None, header_pos, header_size, _s.APISerializers.HEADER_MD5_EIGEN) + file_helper = _Md5ChecksumHelper(None, None, header_pos, None, _s.APISerializers.FILE_MD5_EIGEN) + # Setup expected MD5 results + header_helper.expected = header_helper.read(stream) + file_helper.expected = file_helper.read(stream) + return core.ArchiveMetadata(file_helper, header_helper) + + +def generate_archive(name: str, meta: core.ArchiveMetadata =None, drives: Optional[List[API.Drive]] = None) -> API.Archive: + drives = [] if drives is None else drives + return core.Archive(name,meta,drives) + diff --git a/tests/relic/sga/test_apis.py b/tests/relic/sga/test_apis.py index ec617b0..eccd298 100644 --- a/tests/relic/sga/test_apis.py +++ b/tests/relic/sga/test_apis.py @@ -2,7 +2,7 @@ from abc import abstractmethod from io import BytesIO from pathlib import Path -from typing import Union, Iterable, Tuple +from typing import Union, Iterable, Tuple, List import pytest @@ -42,26 +42,38 @@ def prepare_for_parametrize(files: Iterable[str]) -> Iterable[Tuple[str]]: return [(_,) for _ in files] +_path = Path(__file__).parent +# Explicit path locations try: - path = Path(__file__) - path = path.parent / "sources.json" + path = _path / "sources.json" with path.open() as stream: file_sources = json.load(stream) except IOError as e: file_sources = {} +# Implicit path locations +def _update_implicit_file_sources(src_key: str): + if src_key not in file_sources: + file_sources[src_key] = {} + if "dirs" not in file_sources[src_key]: + file_sources[src_key]["dirs"] = [] + dirs:List[str] = file_sources[src_key]["dirs"] + dirs.append(str(_path / "test_data" / src_key)) + + def _helper(src_key: str, version: Version): + _update_implicit_file_sources(src_key) try: - local_sources = file_sources.get(src_key,{}) + local_sources = file_sources.get(src_key, {}) files = set() - for src_dir in local_sources.get("dirs",[]): + for src_dir in local_sources.get("dirs", []): for f in scan_directory(src_dir, version): files.add(f) - for src_file in local_sources.get("files",[]): + for src_file in local_sources.get("files", []): files.add(src_file) return prepare_for_parametrize(files) - except Exception as e: + except IOError as e: return tuple() diff --git a/test_data/sga/archive-v2_0.sga b/tests/relic/sga/test_data/v2/DowI Test Data.sga similarity index 100% rename from test_data/sga/archive-v2_0.sga rename to tests/relic/sga/test_data/v2/DowI Test Data.sga From f0a9256729761b587bcb19f11a4131db1ff9dd03 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 15:14:57 -0700 Subject: [PATCH 18/19] More MYPY fixes --- src/relic/sga/__init__.py | 2 -- src/relic/sga/_abc.py | 19 +++++++++++++------ src/relic/sga/_serializers.py | 31 +++++++++++++++++++++++-------- src/relic/sga/errors.py | 12 ++++++++++-- src/relic/sga/protocols.py | 26 ++++++++++++-------------- src/relic/sga/v2/__init__.py | 4 ++-- src/relic/sga/v2/core.py | 3 ++- 7 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/relic/sga/__init__.py b/src/relic/sga/__init__.py index f9f73c6..af0b51b 100644 --- a/src/relic/sga/__init__.py +++ b/src/relic/sga/__init__.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from relic.sga._apis import apis as APIs from relic.sga._core import Version, MagicWord, StorageType, VerificationType diff --git a/src/relic/sga/_abc.py b/src/relic/sga/_abc.py index 51dcb32..9aa59a3 100644 --- a/src/relic/sga/_abc.py +++ b/src/relic/sga/_abc.py @@ -6,12 +6,11 @@ from dataclasses import dataclass from io import BytesIO from pathlib import PurePath -from typing import List, Optional, Tuple, BinaryIO, Type, Generic +from typing import List, Optional, Tuple, BinaryIO, Type, Generic, TypeVar from relic.sga import protocols as p -from relic.sga.protocols import TFileMetadata, IONode, IOWalk, TMetadata, TDrive, TArchive, TFolder, TFile, StreamSerializer, IOContainer -from relic.sga._core import StorageType -from relic.sga.errors import Version +from relic.sga._core import StorageType, Version +from relic.sga.protocols import IONode, IOWalk, IOContainer def _build_io_path(name: str, parent: Optional[p.IONode]) -> PurePath: @@ -21,6 +20,14 @@ def _build_io_path(name: str, parent: Optional[p.IONode]) -> PurePath: return PurePath(name) +TFile = TypeVar("TFile", bound=p.File) +TFolder = TypeVar("TFolder", bound=p.Folder) +TDrive = TypeVar("TDrive", bound=p.Drive) +TArchive = TypeVar("TArchive", bound=p.Archive) +TMetadata = TypeVar("TMetadata") +TFileMetadata = TypeVar("TFileMetadata") + + @dataclass class _FileLazyInfo: jump_to: int @@ -67,7 +74,7 @@ class FileDefABC: @dataclass -class File(p.File[TFileMetadata]): +class File(Generic[TFileMetadata], p.File[TFileMetadata]): name: str _data: Optional[bytes] storage_type: StorageType @@ -172,7 +179,7 @@ def walk(self) -> IOWalk: # for good typing; manually define dataclass attributes in construct # it sucks, but good typing is better than no typing -class API(p.API, ABC): +class API(Generic[TArchive, TDrive, TFolder, TFile], p.API[TArchive, TDrive, TFolder, TFile], ABC): def __init__(self, version: Version, archive: Type[TArchive], drive: Type[TDrive], folder: Type[TFolder], file: Type[TFile], serializer: APISerializer): self.version = version self.Archive = archive diff --git a/src/relic/sga/_serializers.py b/src/relic/sga/_serializers.py index 774dc5a..2d7c5e3 100644 --- a/src/relic/sga/_serializers.py +++ b/src/relic/sga/_serializers.py @@ -27,11 +27,25 @@ def __init__(self, layout: Struct): self.layout = layout def unpack(self, stream: BinaryIO) -> TocHeader: - drive_pos, drive_count, folder_pos, folder_count, file_pos, file_count, name_pos, name_count = self.layout.unpack_stream(stream) + drive_pos, \ + drive_count, \ + folder_pos, \ + folder_count, \ + file_pos, \ + file_count, \ + name_pos, \ + name_count = self.layout.unpack_stream(stream) return TocHeader((drive_pos, drive_count), (folder_pos, folder_count), (file_pos, file_count), (name_pos, name_count)) def pack(self, stream: BinaryIO, value: TocHeader) -> int: - args = value.drive_info[0], value.drive_info[1], value.folder_info[0], value.folder_info[1], value.file_info[0], value.file_info[1], value.name_info[0], value.name_info[1] + args = value.drive_info[0], \ + value.drive_info[1], \ + value.folder_info[0], \ + value.folder_info[1], \ + value.file_info[0], \ + value.file_info[1], \ + value.name_info[0], \ + value.name_info[1] return self.layout.pack_stream(stream, *args) @@ -71,7 +85,8 @@ def pack(self, stream: BinaryIO, value: FolderDef) -> int: return self.layout.pack_stream(stream, *args) -def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderDef], file_defs: List[FileDef], names: Dict[int, str], data_pos: int, stream: BinaryIO, build_file_meta: Optional[Callable[[FileDef], TFileMetadata]] = None, decompress:bool=False) -> Tuple[List[_abc.Drive], List[_abc.File]]: +def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderDef], file_defs: List[FileDef], names: Dict[int, str], data_pos: int, stream: BinaryIO, build_file_meta: Optional[Callable[[FileDef], TFileMetadata]] = None, + decompress: bool = False) -> Tuple[List[_abc.Drive], List[_abc.File]]: all_files: List[TFile] = [] drives: List[TDrive] = [] for drive_def in drive_defs: @@ -84,7 +99,7 @@ def _assemble_io_from_defs(drive_defs: List[DriveDef], folder_defs: List[FolderD metadata = build_file_meta(file_def) if build_file_meta is not None else None lazy_info = _FileLazyInfo(data_pos + file_def.data_pos, file_def.length_in_archive, file_def.length_on_disk, stream, decompress) file_compressed = file_def.storage_type != StorageType.Store - file = _abc.File(name=name,_data=None,storage_type=file_def.storage_type,_is_compressed=file_compressed,metadata=metadata, _lazy_info=lazy_info) + file = _abc.File(name=name, _data=None, storage_type=file_def.storage_type, _is_compressed=file_compressed, metadata=metadata, _lazy_info=lazy_info) files.append(file) folders: List[_abc.Folder] = [] @@ -202,12 +217,12 @@ class _Md5ChecksumHelper: size: Optional[int] = None eigen: Optional[bytes] = None - def read(self,stream:Optional[BinaryIO] = None) -> bytes: + def read(self, stream: Optional[BinaryIO] = None) -> bytes: stream = self.stream if stream is None else stream stream.seek(self.start) - md5 = hashlib.md5(self.eigen) + md5 = hashlib.md5(self.eigen) if self.eigen is not None else hashlib.md5() # Safer for large files to read chunked - for chunk in _chunked_read(stream,self.size,256*KiB): + for chunk in _chunked_read(stream, self.size, 256 * KiB): md5.update(chunk) md5_str = md5.hexdigest() return bytes.fromhex(md5_str) @@ -215,4 +230,4 @@ def read(self,stream:Optional[BinaryIO] = None) -> bytes: def validate(self, stream: Optional[BinaryIO] = None) -> None: result = self.read(stream) if self.expected != result: - raise MD5MismatchError(result,self.expected) + raise MD5MismatchError(result, self.expected) diff --git a/src/relic/sga/errors.py b/src/relic/sga/errors.py index afd833b..aa1d970 100644 --- a/src/relic/sga/errors.py +++ b/src/relic/sga/errors.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import List, Any from relic.sga._core import Version @@ -49,3 +47,13 @@ def str_ver(v: Version) -> str: # dont use str(version); too verbose allowed_str = [str_ver(_) for _ in self.allowed] return f"Version `{str_ver(self.received)}` is not supported. Versions supported: `{allowed_str}`" + + +# +__all__ = [ + "_print_mismatch", + "MismatchError", + "VersionMismatchError", + "MD5MismatchError", + "VersionNotSupportedError" +] diff --git a/src/relic/sga/protocols.py b/src/relic/sga/protocols.py index 8c55657..fd6a9fd 100644 --- a/src/relic/sga/protocols.py +++ b/src/relic/sga/protocols.py @@ -3,20 +3,18 @@ from contextlib import contextmanager from pathlib import PurePath from types import ModuleType -from typing import TypeVar, Protocol, List, Optional, ForwardRef, Tuple, Iterable, BinaryIO, Type, runtime_checkable +from typing import TypeVar, Protocol, List, Optional, ForwardRef, Tuple, Iterable, BinaryIO, Type, runtime_checkable, Sequence from relic.sga._core import StorageType, Version -FileFwd = ForwardRef("File") -FolderFwd = ForwardRef("Folder") -DriveFwd = ForwardRef("Drive") -ArchiveFwd = ForwardRef("Archive") -TFile = TypeVar("TFile", bound=FileFwd) -TFolder = TypeVar("TFolder", bound=FolderFwd) -TDrive = TypeVar("TDrive", bound=DriveFwd) -TArchive = TypeVar("TArchive", bound=ArchiveFwd) +TFile = TypeVar("TFile") +TFolder = TypeVar("TFolder") +TDrive = TypeVar("TDrive") +TArchive = TypeVar("TArchive") TMetadata = TypeVar("TMetadata") TFileMetadata = TypeVar("TFileMetadata") +TFile_co = TypeVar("TFile_co", covariant=True) +TFolder_co = TypeVar("TFolder_co", covariant=True) T = TypeVar("T") @@ -40,16 +38,16 @@ class IONode(Protocol): parent: Optional[IOContainer] -class IOContainer(IONode, Protocol): - sub_folders: List[Folder] - files: List[File] +class IOContainer(IONode, Protocol[TFolder, TFile]): + sub_folders: List[TFolder] + files: List[TFile] -IOWalkStep = Tuple[IOContainer, List[FolderFwd], List[FileFwd]] +IOWalkStep = Tuple[IOContainer, Sequence[TFolder_co], Sequence[TFile_co]] IOWalk = Iterable[IOWalkStep] -class IOWalkable(Protocol[TFolder, TFile]): +class IOWalkable(Protocol[TFolder_co, TFile_co]): def walk(self) -> IOWalk: raise NotImplementedError diff --git a/src/relic/sga/v2/__init__.py b/src/relic/sga/v2/__init__.py index f5d47c1..04c1ba2 100644 --- a/src/relic/sga/v2/__init__.py +++ b/src/relic/sga/v2/__init__.py @@ -1,4 +1,4 @@ -from relic.sga import _abc +from relic.sga import _abc, protocols from relic.sga.v2._serializers import APISerializers from relic.sga.v2.core import Archive, Drive, Folder, File, ArchiveMetadata, version @@ -9,7 +9,7 @@ def _create_api(): return api -API = _create_api() +API: protocols.API[Archive, Drive, Folder, File] = _create_api() __all__ = [ "Archive", diff --git a/src/relic/sga/v2/core.py b/src/relic/sga/v2/core.py index c0b3e3f..50f4f35 100644 --- a/src/relic/sga/v2/core.py +++ b/src/relic/sga/v2/core.py @@ -1,6 +1,7 @@ from __future__ import annotations from dataclasses import dataclass +from typing import Type from relic.sga import _abc from relic.sga._serializers import _Md5ChecksumHelper @@ -23,7 +24,7 @@ def header_md5(self) -> bytes: _header_md5: _Md5ChecksumHelper -Archive = _abc.Archive[ArchiveMetadata] +Archive: Type[_abc.Archive[ArchiveMetadata]] = _abc.Archive Folder = _abc.Folder File = _abc.File Drive = _abc.Drive From 27eb8c3deae1ee6e180f1d7ba3add75ee6c56663 Mon Sep 17 00:00:00 2001 From: Marcus Kertesz Date: Sun, 12 Jun 2022 15:32:29 -0700 Subject: [PATCH 19/19] Update setup.cfg Fix setup.cfg not using find_namespace for new 'relic' namespace package --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index aa09d57..0c4fc20 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,7 +21,7 @@ classifiers = include_package_data = True package_dir = = src -packages = find: +packages = find_namespace: python_requires = >=3.9