From befdd6eabfb95f83e8ae91f66f446df8a8354068 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 18 Jun 2026 12:47:16 -0400 Subject: [PATCH] fix: prefer entity key in TOML file when restoring Prior to this commit, we would always assume the entity key would match the directory naming, but that is not a hard requirement. Furthermore, it looks like something in the backup code is adding hashes to the directories unnecessarily, so this was coming up quite often. --- .../applets/backup_restore/zipper.py | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/openedx_content/applets/backup_restore/zipper.py b/src/openedx_content/applets/backup_restore/zipper.py index 3261836c3..b7f3fc921 100644 --- a/src/openedx_content/applets/backup_restore/zipper.py +++ b/src/openedx_content/applets/backup_restore/zipper.py @@ -4,6 +4,7 @@ """ import hashlib import time +import tomllib import zipfile from collections import defaultdict from dataclasses import asdict, dataclass @@ -282,7 +283,7 @@ def create_zip(self, path: str) -> None: Exception: If the learning package cannot be found or if the zip creation fails. """ - with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf: + with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf: # Add the package.toml file package_toml_content: str = toml_learning_package( self.learning_package, self.utc_now, user=self.user, origin_server=self.origin_server @@ -1060,7 +1061,13 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]: "collections": [], } - for path in file_paths: + # The ordering of the file processing is important because we need to + # ensure that TOML files for a given component are processed before the + # static files for that component. + + comp_paths_to_keys = {} + + for path in sorted(file_paths): if path.endswith("/"): # Skip directories continue @@ -1073,21 +1080,40 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]: if path.endswith(".toml"): # Component entity TOML files organized["components"].append(path) + component_toml_str = self._read_file_from_zip(path) + component_toml = tomllib.loads(component_toml_str) + entity_key = component_toml['entity']['key'] + comp_path = path[:-5] # removes the ".toml" at the end + comp_paths_to_keys[comp_path] = entity_key + else: # Component static files # Path structure: entities////component_versions//static/... # Example: entities/xblock.v1/html/my_component_123456/component_versions/v1/static/... - component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456'] + + # e.g. 'entities/xblock.v1/html/my_component_123456' + component_root_path = '/'.join(Path(path).parts[0:4]) + + try: + component_identifier = comp_paths_to_keys[component_root_path] + except KeyError: + self.errors.append( + { + "file": path, + "errors": "Could not find destination entity key for component static file." + } + ) + continue + num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1' - if len(component_key) == 3: - component_identifier = ":".join(component_key) - component_identifier += f":{num_version}" - organized["component_static_files"][component_identifier].append(path) - else: - self.errors.append({"file": path, "errors": "Invalid component static file path structure."}) + + component_identifier += f":{num_version}" + organized["component_static_files"][component_identifier].append(path) + elif path.startswith("collections/") and path.endswith(".toml"): # Collection TOML files organized["collections"].append(path) + return organized def _get_versions_to_write(