可执行文件提取打包py脚本

用于提取游戏目录下的可执行文件，并打包为ZIP文件。

该ZIP包中还有一个文件清单 _MANIFEST.yaml:

files:
- path: Game.exe
  size: 1.5MB
  blake3: cddbd27cb70428a469cad51390dc6a2072042af7b6862b18085bfb75f39ba14a
  sha256: 487bd28f3d0b43ed9827ba519d6d113c4f31059bd62b4492da586c7bc82a9474
- path: d3dcompiler_47.dll
  size: 3.5MB
  blake3: db9aabe06ce1bded55b09a7fbc550b1f67a9f87c937294df68142de4c8afaec0
  sha256: 381ec497d7d40b83616b0e82e15c597d04433acc20e94ebe5611f954b2e5309b
- path: ffmpeg.dll
  size: 2.0MB
  blake3: 0fb2ffa6482b366d3e8ca5fe703580c478c2f3215228778d4dfa81084e9f5580
  sha256: 0e09a91038273416e9c759a74d7febabcabd815aff8090bae384c2402f3cff27
- path: libEGL.dll
  size: 77.0KB
  blake3: 68023d146f351f8f9d57038a94b674eb38038da9278e729025f91436b6f2bd62
  sha256: 3237951e48aa59bb3ecca077f4d075cc7b414ad69e09d8e006eb36e1b38d86ab
- path: libGLESv2.dll
  size: 3.6MB
  blake3: ddc87797fbcde80ddfd7ab19e7ac9308606fe584756a2af52fa0f173f6141f17
  sha256: 1c729f6d3a8e50960a5cf9893d60c2f5fdcb1cca76baca888e3e4b8c7a742641

你可以将这个ZIP压缩包上传到病毒扫描中心。将扫描结果作为凭证。

只要源文件内容和修改时间不变，生成的 ZIP 文件哈希值就会保持一致。

PS: py .\executable_extractor.py .\花葬巫女サクヤ\ -o test.zip
UNIX: python executable_extractor.py ./花葬巫女サクヤ -o test2.zip

--- Output File ---
Path:   test.zip
Size:   48.3MB
BLAKE3: 061ff8611c843c724a4dea91e5b8ce6bf8848273f4b95622fc147238d48c4fc1
SHA256: 27d1c4f05eb385b7589847440b97e8be1512869b6c08a0cd463dabc6127eddaa

--- Output File ---
Path:   test2.zip
Size:   48.3MB
BLAKE3: 061ff8611c843c724a4dea91e5b8ce6bf8848273f4b95622fc147238d48c4fc1
SHA256: 27d1c4f05eb385b7589847440b97e8be1512869b6c08a0cd463dabc6127eddaa

---

python executable_extractor.py --diff test3.zip test4.zip

Modified files:
  Game.exe
    Size: 1.5MB -> 2.0MB
    BLAKE3: cddbd27cb70428a469cad51390dc6a2072042af7b6862b18085bfb75f39ba14a -> 1aaf8de96964ceb42661cd55bdc6d6416da3ee46dc2fd640061bc029327e312d
  d3dcompiler_47.dll
    Size: 3.5MB -> 4.3MB
    BLAKE3: db9aabe06ce1bded55b09a7fbc550b1f67a9f87c937294df68142de4c8afaec0 -> 6091f4ef003e35a8a0aeeeae6e453ffa04bd16a4986bc965c6e75568e505823b
  ffmpeg.dll
    Size: 2.0MB -> 1.7MB
    BLAKE3: 0fb2ffa6482b366d3e8ca5fe703580c478c2f3215228778d4dfa81084e9f5580 -> b88a0ea370282ec50aa8b6dfb0a21b1a38ccaa52d984a7a43b577c4824853513

---

更新内容：
- [x] 实现基础功能
- [x] 解决轻微瑕疵（Lint语法警告，函数复杂度大于15）
- [x] 解决跨平台哈希计算一致性问题
- [x] 新增-d/--diff比较参数（比较清单文件）
- [x] 使用blake3算法代替md5算法
- [x] 修改-d/--diff功能（比较清单和整个ZIP包）
- [x] 根据算法设置chunk size，使用blake3.AUTO来设置最大线程 <-
- [ ] 新功能/修复问题

md5算法版本：网页链接(vikingfile.com)

提示：代码中导入了yaml、blake3模块，运行脚本之前需要自行安装该模块

脚本源码(-h查看帮助)：网页链接(vikingfile.com)

#!/usr/bin/env python3

import argparse
import hashlib
import mmap
import os
import sys
import time
import zipfile
from datetime import datetime
from pathlib import Path

import yaml
from blake3 import blake3

# 配置常量
SMALL_FILE_THRESHOLD = 64 * 1024  # 64KB - 小文件一次读取
MMAP_THRESHOLD = 10 * 1024 * 1024  # 10MB - 大文件使用 mmap
DEFAULT_CHUNK_SIZE = 1024 * 1024  # 1MB
BLAKE3_CHUNK_SIZE = 4 * 1024 * 1024  # 4MB

# 可执行文件扩展名（按类别分组）
EXTENSIONS = {
    # Windows 可执行文件和库
    "windows": {
        ".exe",
        ".dll",
        ".sys",
        ".drv",
        ".ocx",
        ".cpl",
        ".scr",
        ".com",
        ".pif",
        ".msi",
        ".msix",
        ".msp",
        ".mst",
        ".gadget",
        ".ax",
        ".acm",
        ".efi",
        ".mui",
        ".tsp",
        ".fon",
    },
    # Windows 脚本
    "scripts_win": {
        ".bat",
        ".cmd",
        ".ps1",
        ".psm1",
        ".psd1",
        ".vbs",
        ".vbe",
        ".js",
        ".jse",
        ".wsf",
        ".wsh",
        ".wsc",
        ".hta",
    },
    # Unix/Linux 可执行文件和库
    "linux": {
        ".sh",
        ".bash",
        ".zsh",
        ".ksh",
        ".csh",
        ".fish",
        ".so",
        ".ko",
        ".bin",
        ".run",
        ".out",
        ".elf",
        ".appimage",
    },
    # macOS
    "macos": {
        ".dylib",
        ".bundle",
        ".kext",
        ".plugin",
        ".pkg",
        ".mpkg",
        ".dmg",
        ".command",
        ".tool",
        ".action",
        ".workflow",
        ".scpt",
        ".scptd",
    },
    # 跨平台脚本和字节码
    "cross_platform": {
        ".py",
        ".pyc",
        ".pyw",
        ".pyz",
        ".pyzw",
        ".pl",
        ".pm",
        ".ph",
        ".plx",
        ".rb",
        ".rbw",
        ".php",
        ".php3",
        ".php4",
        ".php5",
        ".phtml",
        ".lua",
        ".tcl",
        ".awk",
        ".sed",
        ".jar",
        ".class",
        ".war",
        ".ear",
    },
    # Office 宏文件（可执行恶意代码）
    "office_macro": {
        ".docm",
        ".dotm",
        ".xlsm",
        ".xltm",
        ".xlam",
        ".xll",
        ".pptm",
        ".potm",
        ".ppam",
        ".ppsm",
        ".sldm",
        ".accde",
        ".accdr",
        ".accda",
        ".mde",
        ".ade",
        ".adp",
    },
    # 其他潜在危险文件
    "other": {
        ".lnk",
        ".url",
        ".reg",
        ".inf",
        ".chm",
        ".hlp",
        ".msc",
        ".crt",
        ".cer",
        ".application",
        ".appref-ms",
        ".settingcontent-ms",
        ".vhd",
        ".vhdx",
        ".iso",
        ".img",
    },
}

# 可执行文件扩展名（用于判断是否设置可执行权限）
EXECUTABLE_EXTENSIONS = {
    ".exe",
    ".dll",
    ".so",
    ".dylib",
    ".sh",
    ".bash",
    ".zsh",
    ".ksh",
    ".csh",
    ".py",
    ".pl",
    ".rb",
    ".bat",
    ".cmd",
    ".ps1",
    ".bin",
    ".run",
    ".out",
    ".elf",
    ".appimage",
    ".command",
    ".tool",
}


def get_all_extensions(categories=None):
    """获取指定类别的所有扩展名"""
    if categories is None:
        categories = EXTENSIONS.keys()

    result = set()
    for cat in categories:
        if cat in EXTENSIONS:
            result.update(EXTENSIONS[cat])
    return result


def format_size(size):
    """格式化文件大小"""
    for unit in ["B", "KB", "MB", "GB"]:
        if size < 1024:
            return f"{size:.1f}{unit}"
        size /= 1024
    return f"{size:.1f}TB"


def compute_hash(filepath, algorithm="blake3"):
    """高性能文件哈希计算（根据文件大小自动选择最优策略）"""
    if algorithm == "blake3" and blake3 is not None:
        hasher = blake3(max_threads=blake3.AUTO)
        chunk_size = BLAKE3_CHUNK_SIZE
    else:
        hasher = hashlib.new(algorithm)
        chunk_size = DEFAULT_CHUNK_SIZE

    try:
        file_size = os.path.getsize(filepath)

        # 策略1: 小文件 - 一次性读取
        if file_size <= SMALL_FILE_THRESHOLD:
            with open(filepath, "rb") as f:
                hasher.update(f.read())

        # 策略2: 大文件 - 使用 mmap（零拷贝，利用 OS 页缓存）
        elif file_size >= MMAP_THRESHOLD:
            with open(filepath, "rb") as f:
                with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
                    hasher.update(mm)

        # 策略3: 中等文件 - 分块读取
        else:
            with open(filepath, "rb") as f:
                while chunk := f.read(chunk_size):
                    hasher.update(chunk)

        return hasher.hexdigest()

    except (OSError, IOError, ValueError):
        return None


def compute_file_hashes(filepath):
    """高性能同时计算 BLAKE3 和 SHA256"""
    blake3_hasher = blake3(max_threads=blake3.AUTO)
    sha256_hasher = hashlib.sha256()

    try:
        file_size = os.path.getsize(filepath)

        if file_size <= SMALL_FILE_THRESHOLD:
            with open(filepath, "rb") as f:
                data = f.read()
                blake3_hasher.update(data)
                sha256_hasher.update(data)

        elif file_size >= MMAP_THRESHOLD:
            with open(filepath, "rb") as f:
                with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
                    # mmap 支持 buffer protocol，可直接传入
                    blake3_hasher.update(mm)
                    sha256_hasher.update(mm)

        else:
            with open(filepath, "rb") as f:
                while chunk := f.read(DEFAULT_CHUNK_SIZE):
                    blake3_hasher.update(chunk)
                    sha256_hasher.update(chunk)

        return blake3_hasher.hexdigest(), sha256_hasher.hexdigest()

    except (OSError, IOError, ValueError):
        return None, None


def is_elf_or_script(filepath):
    """检查文件是否是ELF二进制或脚本（通过文件头判断）"""
    try:
        with open(filepath, "rb") as f:
            header = f.read(4)
            if header[:4] == b"\x7fELF":
                return True
            if header[:2] == b"#!":
                return True
    except (OSError, IOError):
        pass
    return False


def is_executable_extension(filepath):
    """判断文件扩展名是否表示可执行文件"""
    ext = filepath.suffix.lower()
    return ext in EXECUTABLE_EXTENSIONS


def should_include(filepath, extensions, check_executable=False):
    """判断文件是否应该被包含"""
    ext = filepath.suffix.lower()
    if ext in extensions:
        return True

    # Unix下检查无扩展名但有执行权限的文件
    if check_executable and sys.platform != "win32":
        if not ext and os.access(filepath, os.X_OK):
            return is_elf_or_script(filepath)

    return False


def scan_directory(
    directory,
    extensions,
    recursive=True,
    exclude_dirs=None,
    max_size=None,
    check_exec=False,
):
    """扫描目录，返回匹配的文件列表"""
    directory = Path(directory).resolve()
    exclude_dirs = exclude_dirs or set()
    found = []
    stats = {"dirs": 0, "files": 0, "matched": 0, "skipped_size": 0, "skipped_err": 0}

    def scan(path):
        stats["dirs"] += 1
        entries = get_directory_entries(path)
        if entries is None:
            return

        for entry in entries:
            if entry.is_dir():
                process_directory(entry, recursive, exclude_dirs, scan)
            elif entry.is_file():
                process_file(
                    entry, extensions, check_exec, max_size, directory, found, stats
                )

    scan(directory)
    return found, stats


def get_directory_entries(path):
    """获取目录中的所有条目"""
    try:
        return list(path.iterdir())
    except PermissionError:
        print(f"WARN: permission denied: {path}", file=sys.stderr)
        return None


def process_directory(entry, recursive, exclude_dirs, scan_func):
    """处理目录条目"""
    if recursive and entry.name not in exclude_dirs:
        scan_func(entry)


def process_file(entry, extensions, check_exec, max_size, directory, found, stats):
    """处理文件条目"""
    stats["files"] += 1

    if not should_include(entry, extensions, check_exec):
        return

    file_info = get_file_info(entry)
    if file_info is None:
        stats["skipped_err"] += 1
        return

    size, mtime = file_info

    if should_skip_by_size(size, max_size):
        stats["skipped_size"] += 1
        return

    add_file_to_results(entry, directory, size, mtime, found, stats)


def get_file_info(entry):
    """获取文件的大小和修改时间"""
    try:
        stat_info = entry.stat()
        return stat_info.st_size, stat_info.st_mtime
    except OSError:
        return None


def should_skip_by_size(size, max_size):
    """检查文件是否应该被跳过（因为大小限制）"""
    return max_size and size > max_size


def add_file_to_results(entry, directory, size, mtime, found, stats):
    """将文件添加到结果列表"""
    rel_path = entry.relative_to(directory)
    found.append(
        {
            "path": entry,
            "rel_path": rel_path,
            "size": size,
            "mtime": mtime,
            "ext": entry.suffix.lower(),
        }
    )
    stats["matched"] += 1


def mtime_to_ziptime(mtime):
    """将mtime转换为ZIP时间元组"""
    t = time.localtime(mtime)
    return (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)


def create_zip(files, output_path, compute_hashes=True):
    """创建ZIP文件（保证幂等性和跨平台一致性）"""
    if not files:
        return None

    files = sort_files_by_path(files)
    manifest_data = []
    latest_mtime = 0

    # 指定固定的压缩级别以确保跨平台一致性
    with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
        for f in files:
            latest_mtime = process_single_file(
                f, zf, manifest_data, latest_mtime, compute_hashes
            )

        add_manifest_to_zip(zf, manifest_data, latest_mtime)

    return output_path


def sort_files_by_path(files):
    """按相对路径排序文件（使用POSIX路径格式确保跨平台一致性）"""
    return sorted(files, key=lambda f: f["rel_path"].as_posix())


def process_single_file(
    file_info, zip_file, manifest_data, latest_mtime, compute_hashes
):
    """处理单个文件并将其添加到ZIP中"""
    try:
        filepath = file_info["path"]
        # 使用 as_posix() 确保路径分隔符统一为正斜杠
        arc_name = file_info["rel_path"].as_posix()
        mtime = file_info["mtime"]

        # 更新最新的修改时间
        if mtime > latest_mtime:
            latest_mtime = mtime

        # 判断是否是可执行文件（基于扩展名，不依赖平台权限）
        is_exec = is_executable_extension(filepath)

        zip_info = create_zip_info(arc_name, mtime, is_executable=is_exec)

        file_data = read_file_data(filepath)
        zip_file.writestr(zip_info, file_data)

        file_hashes = compute_file_hashes_if_needed(filepath, compute_hashes)
        add_to_manifest(manifest_data, arc_name, file_info, file_hashes)

        return latest_mtime
    except Exception as e:
        print(f"ERROR: failed to add {file_info['path']}: {e}", file=sys.stderr)
        return latest_mtime


def create_zip_info(arc_name, mtime, is_executable=False):
    """创建ZipInfo对象并设置跨平台一致的属性"""
    info = zipfile.ZipInfo(arc_name)
    info.date_time = mtime_to_ziptime(mtime)
    info.compress_type = zipfile.ZIP_DEFLATED

    # 强制使用 Unix 作为创建系统标识符，确保跨平台一致性
    # 0 = MS-DOS, 3 = Unix
    info.create_system = 3

    # 使用固定的Unix权限值，不依赖平台的实际文件权限
    # 0o100000 = S_IFREG (普通文件标志)
    # 0o755 = rwxr-xr-x (可执行) 0o644 = rw-r--r-- (普通文件)
    if is_executable:
        mode = 0o100755
    else:
        mode = 0o100644
    info.external_attr = mode << 16

    return info


def read_file_data(filepath):
    """读取文件内容"""
    with open(filepath, "rb") as fp:
        return fp.read()


def compute_file_hashes_if_needed(filepath, compute_hashes):
    """计算文件哈希（如果需要）"""
    if compute_hashes:
        blake3 = compute_hash(filepath, "blake3")
        sha256 = compute_hash(filepath, "sha256")
        return blake3, sha256
    return None, None


def add_to_manifest(manifest_data, arc_name, file_info, file_hashes):
    """将文件信息添加到清单"""
    blake3, sha256 = file_hashes
    manifest_data.append(
        {
            "path": arc_name,  # arc_name 已经是 POSIX 格式
            "size": format_size(file_info["size"]),
            "blake3": blake3 if blake3 else "error",
            "sha256": sha256 if sha256 else "error",
        }
    )


def add_manifest_to_zip(zip_file, manifest_data, latest_mtime):
    """将清单添加到ZIP文件"""
    manifest_info = zipfile.ZipInfo("_MANIFEST.yaml")

    if latest_mtime > 0:
        manifest_info.date_time = mtime_to_ziptime(latest_mtime)
    else:
        # 固定时间戳作为后备
        manifest_info.date_time = (2000, 1, 1, 0, 0, 0)

    manifest_info.compress_type = zipfile.ZIP_DEFLATED

    # 设置跨平台一致的属性
    manifest_info.create_system = 3  # Unix
    manifest_info.external_attr = 0o100644 << 16  # 普通文件权限

    # 将数据转换为YAML格式
    manifest_yaml = yaml.dump(
        {"files": manifest_data},
        allow_unicode=True,
        sort_keys=False,
        default_flow_style=False,
    )

    # 确保使用 Unix 换行符 (LF) 而不是 Windows 换行符 (CRLF)
    manifest_yaml = manifest_yaml.replace("\r\n", "\n").replace("\r", "\n")

    # 使用 UTF-8 编码写入
    zip_file.writestr(manifest_info, manifest_yaml.encode("utf-8"))


def print_results(files, stats, output_path=None):
    """打印结果"""
    print("\n--- Scan Results ---")
    print(f"Directories scanned: {stats['dirs']}")
    print(f"Files scanned:       {stats['files']}")
    print(f"Files matched:       {stats['matched']}")

    if stats["skipped_size"]:
        print(f"Skipped (too large): {stats['skipped_size']}")
    if stats["skipped_err"]:
        print(f"Skipped (errors):    {stats['skipped_err']}")

    if files:
        total_size = sum(f["size"] for f in files)
        print(f"Total size:          {format_size(total_size)}")

        # 按扩展名统计
        ext_count = {}
        for f in files:
            ext = f["ext"] or "(none)"
            ext_count[ext] = ext_count.get(ext, 0) + 1

        print("\nBy extension:")
        for ext, count in sorted(ext_count.items(), key=lambda x: -x[1]):
            print(f"  {ext}: {count}")

    if output_path and output_path.exists():
        print("\n--- Output File ---")
        print(f"Path:   {output_path}")
        print(f"Size:   {format_size(output_path.stat().st_size)}")

        blake3, sha256 = compute_file_hashes(output_path)
        if blake3:
            print(f"BLAKE3: {blake3}")
        if sha256:
            print(f"SHA256: {sha256}")


def list_files(files):
    """列出找到的文件"""
    if not files:
        print("No files found.")
        return

    print(f"\n{'Path':<60} {'Size':>10} {'Ext':>8}")
    print("-" * 80)
    for f in sorted(files, key=lambda x: x["rel_path"].as_posix()):
        print(f"{str(f['rel_path']):<60} {format_size(f['size']):>10} {f['ext']:>8}")


def validate_file_entry(file_entry):
    """验证文件条目是否包含必需字段"""
    required_fields = ["path", "size", "blake3"]
    if not isinstance(file_entry, dict):
        return False
    return all(field in file_entry for field in required_fields)


def parse_manifest_from_zip(zip_path):
    """从 ZIP 包中解析 _MANIFEST.yaml 文件"""
    try:
        with zipfile.ZipFile(zip_path, "r") as zf:
            with zf.open("_MANIFEST.yaml") as f:
                manifest_yaml = f.read().decode("utf-8")
                manifest_data = yaml.safe_load(manifest_yaml)
                if manifest_data is None:
                    return []

                files = manifest_data.get("files", [])

                # 过滤并警告无效条目
                valid_files = []
                for i, file in enumerate(files):
                    if validate_file_entry(file):
                        valid_files.append(file)
                    else:
                        print(
                            f"WARNING: Entry {i + 1} in {zip_path} has incomplete fields, skipped: {file}"
                        )

                return valid_files
    except Exception as e:
        print(f"ERROR: Failed to parse _MANIFEST.yaml in {zip_path}: {e}")
        return None


def compare_manifest_data(manifest1, manifest2):
    """比较两个清单中的文件信息"""
    files1 = {file["path"]: file for file in manifest1}
    files2 = {file["path"]: file for file in manifest2}

    diff = {
        "added": [],
        "removed": [],
        "modified": [],
    }

    # 检查新增的文件
    for path, file in files2.items():
        if path not in files1:
            diff["added"].append(file)

    # 检查删除的文件
    for path, file in files1.items():
        if path not in files2:
            diff["removed"].append(file)

    # 检查修改的文件
    for path, file in files1.items():
        if path in files2:
            file2 = files2[path]
            if file["size"] != file2["size"] or file["blake3"] != file2["blake3"]:
                diff["modified"].append(
                    {
                        "path": path,
                        "old": file,
                        "new": file2,
                    }
                )

    return diff


def print_manifest_diff(diff):
    """输出清单比较结果"""
    if not diff["added"] and not diff["removed"] and not diff["modified"]:
        print("The two manifests are identical.")
        return

    if diff["added"]:
        print("\nAdded files:")
        for file in diff["added"]:
            print(f"  {file['path']} (Size: {file['size']}, BLAKE3: {file['blake3']})")

    if diff["removed"]:
        print("\nRemoved files:")
        for file in diff["removed"]:
            print(f"  {file['path']} (Size: {file['size']}, BLAKE3: {file['blake3']})")

    if diff["modified"]:
        print("\nModified files:")
        for item in diff["modified"]:
            old_file = item["old"]
            new_file = item["new"]
            print(f"  {item['path']}")
            print(f"    Size: {old_file['size']} -> {new_file['size']}")
            print(f"    BLAKE3: {old_file['blake3']} -> {new_file['blake3']}")


def compare_manifests(zip1_path, zip2_path):
    """比较两个 ZIP 包中的 _MANIFEST.yaml 文件"""
    manifest1 = parse_manifest_from_zip(zip1_path)
    manifest2 = parse_manifest_from_zip(zip2_path)

    if manifest1 is None or manifest2 is None:
        print("ERROR: Failed to parse manifest files.")
        return

    diff = compare_manifest_data(manifest1, manifest2)
    print_manifest_diff(diff)


def compare_zip_hashes(zip1_path, zip2_path):
    """比较两个 ZIP 包的 BLAKE3 和 SHA256 哈希值"""

    def compute_zip_hash(zip_path):
        try:
            with open(zip_path, "rb") as f:
                data = f.read()
                blake3_hash = blake3(data).hexdigest()
                sha256_hash = hashlib.sha256(data).hexdigest()
                return blake3_hash, sha256_hash
        except Exception as e:
            print(f"ERROR: Failed to compute hash for {zip_path}: {e}")
            return None, None

    blake3_1, sha256_1 = compute_zip_hash(zip1_path)
    blake3_2, sha256_2 = compute_zip_hash(zip2_path)

    if blake3_1 is None or sha256_1 is None or blake3_2 is None or sha256_2 is None:
        print("ERROR: Failed to compute ZIP hashes.")
        return

    print("\n--- ZIP Hash Comparison ---")
    print(f"ZIP1 BLAKE3: {blake3_1}")
    print(f"ZIP2 BLAKE3: {blake3_2}")
    print(f"ZIP1 SHA256: {sha256_1}")
    print(f"ZIP2 SHA256: {sha256_2}")

    if blake3_1 == blake3_2 and sha256_1 == sha256_2:
        print("The two ZIP files have identical hashes.")
    else:
        print("The two ZIP files have different hashes.")


def main():
    parser = argparse.ArgumentParser(
        description="Extract executables/scripts/DLLs for virus scanning.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s /path/to/scan
  %(prog)s /path/to/scan -o scan_result.zip
  %(prog)s . --categories windows scripts_win
  %(prog)s . --max-size 50 --no-recursive
  %(prog)s . --exclude-dir node_modules --exclude-dir .git
  %(prog)s . --list-only
  %(prog)s . --check-executable

Categories:
  windows         Windows executables (.exe .dll .sys .ocx ...)
  scripts_win     Windows scripts (.bat .cmd .ps1 .vbs .hta ...)
  linux           Linux/Unix executables (.sh .so .ko .bin ...)
  macos           macOS files (.dylib .pkg .dmg .command ...)
  cross_platform  Cross-platform scripts (.py .pl .rb .jar ...)
  office_macro    Office macro files (.docm .xlsm .pptm ...)
  other           Other risky files (.lnk .reg .chm .iso ...)

Recommended online scanners:
  VirusTotal:       网页链接(www.virustotal.com)
  Hybrid Analysis:  https://www.hybrid-analysis.com/
  MetaDefender:     https://metadefender.opswat.com/
""",
    )

    parser.add_argument("directory", nargs="?", help="directory to scan", default=None)
    parser.add_argument("-o", "--output", help="output ZIP file path")
    parser.add_argument(
        "-c",
        "--categories",
        nargs="+",
        choices=list(EXTENSIONS.keys()),
        metavar="CAT",
        help="file categories to include (default: all)",
    )
    parser.add_argument(
        "-m",
        "--max-size",
        type=int,
        default=100,
        help="max file size in MB (default: 100)",
    )
    parser.add_argument(
        "-e",
        "--exclude-dir",
        action="append",
        dest="exclude_dirs",
        metavar="DIR",
        help="directory name to exclude (can be used multiple times)",
    )
    parser.add_argument(
        "-d",
        "--diff",
        nargs=2,
        metavar=("ZIP1", "ZIP2"),
        help="compare manifests and ZIP hashes of two ZIP files",
    )
    parser.add_argument(
        "--no-recursive", action="store_true", help="do not scan subdirectories"
    )
    parser.add_argument(
        "--no-hash",
        action="store_true",
        help="do not compute BLAKE3 hashes for files in manifest",
    )
    parser.add_argument(
        "--list-only", action="store_true", help="list files only, do not create ZIP"
    )
    parser.add_argument(
        "--check-executable",
        action="store_true",
        help="include executable files without extension (Unix)",
    )
    parser.add_argument(
        "--show-extensions",
        action="store_true",
        help="show all supported extensions and exit",
    )

    args = parser.parse_args()

    # 进行Diff比较
    if args.diff:
        zip1_path, zip2_path = args.diff
        compare_manifests(zip1_path, zip2_path)
        compare_zip_hashes(zip1_path, zip2_path)
        return 0
    # 如果没有提供 --diff 参数，则检查 directory 参数
    if not args.directory:
        print("ERROR: directory is required when not using --diff", file=sys.stderr)
        return 1

    # 显示扩展名列表
    if args.show_extensions:
        for cat, exts in sorted(EXTENSIONS.items()):
            print(f"\n[{cat}]")
            print(f"  {' '.join(sorted(exts))}")
        return 0

    # 验证目录
    source_dir = Path(args.directory).resolve()
    if not source_dir.exists():
        print(f"ERROR: directory not found: {source_dir}", file=sys.stderr)
        return 1
    if not source_dir.is_dir():
        print(f"ERROR: not a directory: {source_dir}", file=sys.stderr)
        return 1

    # 获取扩展名
    extensions = get_all_extensions(args.categories)
    max_size = args.max_size * 1024 * 1024 if args.max_size else None
    exclude_dirs = set(args.exclude_dirs) if args.exclude_dirs else set()

    print(f"Scanning: {source_dir}")
    print(f"Categories: {', '.join(args.categories) if args.categories else 'all'}")
    if exclude_dirs:
        print(f"Excluding: {', '.join(sorted(exclude_dirs))}")

    # 扫描
    files, stats = scan_directory(
        source_dir,
        extensions,
        recursive=not args.no_recursive,
        exclude_dirs=exclude_dirs,
        max_size=max_size,
        check_exec=args.check_executable,
    )

    # 输出结果
    if args.list_only:
        list_files(files)
        print_results(files, stats)
        return 0

    # 创建ZIP
    if not files:
        print("No matching files found.")
        return 0

    if args.output:
        output_path = Path(args.output)
    else:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_path = Path(f"executables_{timestamp}.zip")

    print(f"Creating ZIP: {output_path}")
    create_zip(files, output_path, compute_hashes=not args.no_hash)
    print_results(files, stats, output_path)

    return 0


if __name__ == "__main__":
    sys.exit(main())