import importlib.util
import sys
import tempfile
import types
import unittest
import stat
import zipfile
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_TESTS_DIR = REPO_ROOT / "tools" / "scripts" / "tests"
if str(TOOLS_TESTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_TESTS_DIR))
from symlink_test_utils import symlink_or_skip
defusedxml = types.ModuleType("defusedxml")
defusedxml_minidom = types.ModuleType("defusedxml.minidom")
defusedxml.minidom = defusedxml_minidom
sys.modules.setdefault("defusedxml", defusedxml)
sys.modules.setdefault("defusedxml.minidom", defusedxml_minidom)
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
class OfficeUnpackSecurityTests(unittest.TestCase):
def test_extract_archive_safely_blocks_zip_slip(self):
for relative_path, module_name in [
("skills/docx-official/ooxml/scripts/unpack.py", "docx_unpack"),
("skills/pptx-official/ooxml/scripts/unpack.py", "pptx_unpack"),
]:
module = load_module(relative_path, module_name)
with self.subTest(module=relative_path):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
archive_path = temp_path / "payload.zip"
output_dir = temp_path / "output"
with zipfile.ZipFile(archive_path, "w") as archive:
archive.writestr("../escape.txt", "escape")
archive.writestr("word/document.xml", "")
with self.assertRaises(ValueError):
module.extract_archive_safely(archive_path, output_dir)
self.assertFalse((temp_path / "escape.txt").exists())
def test_extract_archive_safely_blocks_zip_symlinks(self):
for relative_path, module_name in [
("skills/docx-official/ooxml/scripts/unpack.py", "docx_unpack_symlink"),
("skills/pptx-official/ooxml/scripts/unpack.py", "pptx_unpack_symlink"),
]:
module = load_module(relative_path, module_name)
with self.subTest(module=relative_path):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
archive_path = temp_path / "payload.zip"
output_dir = temp_path / "output"
with zipfile.ZipFile(archive_path, "w") as archive:
symlink_info = zipfile.ZipInfo("word/link")
symlink_info.create_system = 3
symlink_info.external_attr = (stat.S_IFLNK | 0o777) << 16
archive.writestr(symlink_info, "../escape.txt")
archive.writestr("word/document.xml", "")
with self.assertRaises(ValueError):
module.extract_archive_safely(archive_path, output_dir)
self.assertFalse((temp_path / "escape.txt").exists())
def test_extract_archive_safely_blocks_high_compression_ratio(self):
for relative_path, module_name in [
("skills/docx-official/ooxml/scripts/unpack.py", "docx_unpack_ratio"),
("skills/pptx-official/ooxml/scripts/unpack.py", "pptx_unpack_ratio"),
]:
module = load_module(relative_path, module_name)
module.MAX_COMPRESSION_RATIO = 10
with self.subTest(module=relative_path):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
archive_path = temp_path / "payload.zip"
with zipfile.ZipFile(archive_path, "w", zipfile.ZIP_DEFLATED) as archive:
archive.writestr("word/document.xml", "A" * 100_000)
with self.assertRaises(ValueError):
module.extract_archive_safely(archive_path, temp_path / "output")
def test_pack_document_blocks_input_symlinks(self):
for relative_path, module_name in [
("skills/docx-official/ooxml/scripts/pack.py", "docx_pack"),
("skills/pptx-official/ooxml/scripts/pack.py", "pptx_pack"),
]:
module = load_module(relative_path, module_name)
with self.subTest(module=relative_path):
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
input_dir = temp_path / "input"
outside = temp_path / "outside.txt"
input_dir.mkdir()
outside.write_text("secret", encoding="utf-8")
symlink_or_skip(self, outside, input_dir / "leak.txt")
with self.assertRaises(ValueError):
module.validate_input_tree(input_dir)
if __name__ == "__main__":
unittest.main()