浏览器运行环境异常,请检查是否开启本站的JavaScript权限或下载最新版浏览器
Logo
综合
文章
漫画
圈子
帮助
签到
创作
极速下载
资源区
学习区
专栏区
无标题文章
按赞
反对
评论
收藏
分享

可执行文件提取打包py脚本

用于提取游戏目录下的可执行文件,并打包为ZIP文件。

该ZIP包中还有一个文件清单 _MANIFEST.yaml:

files:
- path: Game/Game.exe
  size: 2.0MB
  md5: f760c2f862cf7d98b570bae41364e05d
  sha256: b2fe0f8cd0adb1d221d662e5f33b36b8245413178d8986b0d6dd3c42fc6d78de
- path: Game/MVPluginPatcher.exe
  size: 37.5KB
  md5: e0cefcd0d34a5e59413344104d5d57a4
  sha256: 89edcd5d47333c299c086a5278ed3db20f7b6e471e849f92102b1ef6e42eecf4
- path: Game/d3dcompiler_47.dll
  size: 4.3MB
  md5: 7641e39b7da4077084d2afe7c31032e0
  sha256: 44422e6936dc72b7ac5ed16bb8bcae164b7554513e52efb66a3e942cec328a47
- path: Game/ffmpeg.dll
  size: 1.7MB
  md5: f2db31ad12fd16c5359ac583181fbfdf
  sha256: 44917512b58d193b4361f2d07b723346feb7bc69f2b0ff51002b3c76f88d0f71

你可以将这个ZIP压缩包上传到病毒扫描中心。将扫描结果作为凭证。
只要源文件内容和修改时间不变,生成的 ZIP 文件哈希值就会保持一致。

PS: py .\executable_extractor.py .\花葬巫女サクヤ\ -o test.zip
UNIX: python executable_extractor.py ./花葬巫女サクヤ -o test2.zip

--- Output File ---
Path:   test.zip
Size:   48.3MB
MD5:    ec02318ec0185b1c52e5263f9a1eeeea
SHA256: e32c426bb04994a65343b537dfb5788342f5d0eff22a8ca182ab6c7f4b480cda

--- Output File ---
Path:   test2.zip
Size:   48.3MB
MD5:    ec02318ec0185b1c52e5263f9a1eeeea
SHA256: e32c426bb04994a65343b537dfb5788342f5d0eff22a8ca182ab6c7f4b480cda

---

更新内容:
- [x] 实现基础功能
- [x] 解决轻微瑕疵(Lint语法警告,函数复杂度大于15)
- [x] 解决跨平台哈希计算一致性问题 <-
- [ ] 发现新问题

提示:代码中导入了yaml模块,运行脚本之前需要自行安装该模块

脚本源码(-h查看帮助):网页链接(vikingfile.com)

#!/usr/bin/env python3

import os
import sys
import time
import zipfile
import argparse
import hashlib
from pathlib import Path
from datetime import datetime
import yaml

# 可执行文件扩展名(按类别分组)
EXTENSIONS = {
    # Windows 可执行文件和库
    'windows': {
        '.exe', '.dll', '.sys', '.drv', '.ocx', '.cpl', '.scr',
        '.com', '.pif', '.msi', '.msix', '.msp', '.mst', '.gadget',
        '.ax', '.acm', '.efi', '.mui', '.tsp', '.fon',
    },
    
    # Windows 脚本
    'scripts_win': {
        '.bat', '.cmd', '.ps1', '.psm1', '.psd1',
        '.vbs', '.vbe', '.js', '.jse', '.wsf', '.wsh', '.wsc', '.hta',
    },
    
    # Unix/Linux 可执行文件和库
    'linux': {
        '.sh', '.bash', '.zsh', '.ksh', '.csh', '.fish',
        '.so', '.ko', '.bin', '.run', '.out', '.elf', '.appimage',
    },
    
    # macOS
    'macos': {
        '.dylib', '.bundle', '.kext', '.plugin',
        '.pkg', '.mpkg', '.dmg', '.command', '.tool',
        '.action', '.workflow', '.scpt', '.scptd',
    },
    
    # 跨平台脚本和字节码
    'cross_platform': {
        '.py', '.pyc', '.pyw', '.pyz', '.pyzw',
        '.pl', '.pm', '.ph', '.plx',
        '.rb', '.rbw',
        '.php', '.php3', '.php4', '.php5', '.phtml',
        '.lua', '.tcl', '.awk', '.sed',
        '.jar', '.class', '.war', '.ear',
    },
    
    # Office 宏文件(可执行恶意代码)
    'office_macro': {
        '.docm', '.dotm', '.xlsm', '.xltm', '.xlam', '.xll',
        '.pptm', '.potm', '.ppam', '.ppsm', '.sldm',
        '.accde', '.accdr', '.accda', '.mde', '.ade', '.adp',
    },
    
    # 其他潜在危险文件
    'other': {
        '.lnk', '.url', '.reg', '.inf',
        '.chm', '.hlp', '.msc', '.crt', '.cer',
        '.application', '.appref-ms', '.settingcontent-ms',
        '.vhd', '.vhdx', '.iso', '.img',
    },
}

# 可执行文件扩展名(用于判断是否设置可执行权限)
EXECUTABLE_EXTENSIONS = {
    '.exe', '.dll', '.so', '.dylib', '.sh', '.bash', '.zsh', '.ksh', '.csh',
    '.py', '.pl', '.rb', '.bat', '.cmd', '.ps1', '.bin', '.run', '.out',
    '.elf', '.appimage', '.command', '.tool',
}


def get_all_extensions(categories=None):
    """获取指定类别的所有扩展名"""
    if categories is None:
        categories = EXTENSIONS.keys()
    
    result = set()
    for cat in categories:
        if cat in EXTENSIONS:
            result.update(EXTENSIONS[cat])
    return result


def format_size(size):
    """格式化文件大小"""
    for unit in ['B', 'KB', 'MB', 'GB']:
        if size < 1024:
            return f"{size:.1f}{unit}"
        size /= 1024
    return f"{size:.1f}TB"


def compute_hash(filepath, algorithm='md5', chunk_size=8192):
    """计算文件哈希值"""
    h = hashlib.new(algorithm)
    try:
        with open(filepath, 'rb') as f:
            while chunk := f.read(chunk_size):
                h.update(chunk)
        return h.hexdigest()
    except (OSError, IOError):
        return None


def compute_file_hashes(filepath):
    """计算文件的MD5和SHA256"""
    md5_hash = hashlib.md5()
    sha256_hash = hashlib.sha256()
    try:
        with open(filepath, 'rb') as f:
            while chunk := f.read(8192):
                md5_hash.update(chunk)
                sha256_hash.update(chunk)
        return md5_hash.hexdigest(), sha256_hash.hexdigest()
    except (OSError, IOError):
        return None, None


def is_elf_or_script(filepath):
    """检查文件是否是ELF二进制或脚本(通过文件头判断)"""
    try:
        with open(filepath, 'rb') as f:
            header = f.read(4)
            if header[:4] == b'\x7fELF':
                return True
            if header[:2] == b'#!':
                return True
    except (OSError, IOError):
        pass
    return False


def is_executable_extension(filepath):
    """判断文件扩展名是否表示可执行文件"""
    ext = filepath.suffix.lower()
    return ext in EXECUTABLE_EXTENSIONS


def should_include(filepath, extensions, check_executable=False):
    """判断文件是否应该被包含"""
    ext = filepath.suffix.lower()
    if ext in extensions:
        return True
    
    # Unix下检查无扩展名但有执行权限的文件
    if check_executable and sys.platform != 'win32':
        if not ext and os.access(filepath, os.X_OK):
            return is_elf_or_script(filepath)
    
    return False


def scan_directory(directory, extensions, recursive=True,
                    exclude_dirs=None, max_size=None, check_exec=False):
    """扫描目录,返回匹配的文件列表"""
    directory = Path(directory).resolve()
    exclude_dirs = exclude_dirs or set()
    found = []
    stats = {'dirs': 0, 'files': 0, 'matched': 0, 'skipped_size': 0, 'skipped_err': 0}
    
    def scan(path):
        stats['dirs'] += 1
        entries = get_directory_entries(path)
        if entries is None:
            return
        
        for entry in entries:
            if entry.is_dir():
                process_directory(entry, recursive, exclude_dirs, scan)
            elif entry.is_file():
                process_file(entry, extensions, check_exec, max_size, directory, found, stats)
    
    scan(directory)
    return found, stats


def get_directory_entries(path):
    """获取目录中的所有条目"""
    try:
        return list(path.iterdir())
    except PermissionError:
        print(f"WARN: permission denied: {path}", file=sys.stderr)
        return None


def process_directory(entry, recursive, exclude_dirs, scan_func):
    """处理目录条目"""
    if recursive and entry.name not in exclude_dirs:
        scan_func(entry)


def process_file(entry, extensions, check_exec, max_size, directory, found, stats):
    """处理文件条目"""
    stats['files'] += 1
    
    if not should_include(entry, extensions, check_exec):
        return
    
    file_info = get_file_info(entry)
    if file_info is None:
        stats['skipped_err'] += 1
        return
    
    size, mtime = file_info
    
    if should_skip_by_size(size, max_size):
        stats['skipped_size'] += 1
        return
    
    add_file_to_results(entry, directory, size, mtime, found, stats)


def get_file_info(entry):
    """获取文件的大小和修改时间"""
    try:
        stat_info = entry.stat()
        return stat_info.st_size, stat_info.st_mtime
    except OSError:
        return None


def should_skip_by_size(size, max_size):
    """检查文件是否应该被跳过(因为大小限制)"""
    return max_size and size > max_size


def add_file_to_results(entry, directory, size, mtime, found, stats):
    """将文件添加到结果列表"""
    rel_path = entry.relative_to(directory)
    found.append({
        'path': entry,
        'rel_path': rel_path,
        'size': size,
        'mtime': mtime,
        'ext': entry.suffix.lower(),
    })
    stats['matched'] += 1


def mtime_to_ziptime(mtime):
    """将mtime转换为ZIP时间元组"""
    t = time.localtime(mtime)
    return (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)


def create_zip(files, output_path, compute_hashes=True):
    """创建ZIP文件(保证幂等性和跨平台一致性)"""
    if not files:
        return None
    
    files = sort_files_by_path(files)
    manifest_data = []
    latest_mtime = 0
    
    # 指定固定的压缩级别以确保跨平台一致性
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
        for f in files:
            latest_mtime = process_single_file(f, zf, manifest_data, latest_mtime, compute_hashes)
        
        add_manifest_to_zip(zf, manifest_data, latest_mtime)
    
    return output_path


def sort_files_by_path(files):
    """按相对路径排序文件(使用POSIX路径格式确保跨平台一致性)"""
    return sorted(files, key=lambda f: f['rel_path'].as_posix())


def process_single_file(file_info, zip_file, manifest_data, latest_mtime, compute_hashes):
    """处理单个文件并将其添加到ZIP中"""
    try:
        filepath = file_info['path']
        # 使用 as_posix() 确保路径分隔符统一为正斜杠
        arc_name = file_info['rel_path'].as_posix()
        mtime = file_info['mtime']
        
        # 更新最新的修改时间
        if mtime > latest_mtime:
            latest_mtime = mtime
        
        # 判断是否是可执行文件(基于扩展名,不依赖平台权限)
        is_exec = is_executable_extension(filepath)
        
        zip_info = create_zip_info(arc_name, mtime, is_executable=is_exec)
        
        file_data = read_file_data(filepath)
        zip_file.writestr(zip_info, file_data)
        
        file_hashes = compute_file_hashes_if_needed(filepath, compute_hashes)
        add_to_manifest(manifest_data, arc_name, file_info, file_hashes)
        
        return latest_mtime
    except Exception as e:
        print(f"ERROR: failed to add {file_info['path']}: {e}", file=sys.stderr)
        return latest_mtime


def create_zip_info(arc_name, mtime, is_executable=False):
    """创建ZipInfo对象并设置跨平台一致的属性"""
    info = zipfile.ZipInfo(arc_name)
    info.date_time = mtime_to_ziptime(mtime)
    info.compress_type = zipfile.ZIP_DEFLATED
    
    # 强制使用 Unix 作为创建系统标识符,确保跨平台一致性
    # 0 = MS-DOS, 3 = Unix
    info.create_system = 3
    
    # 使用固定的Unix权限值,不依赖平台的实际文件权限
    # 0o100000 = S_IFREG (普通文件标志)
    # 0o755 = rwxr-xr-x (可执行) 0o644 = rw-r--r-- (普通文件)
    if is_executable:
        mode = 0o100755
    else:
        mode = 0o100644
    info.external_attr = mode << 16
    
    return info


def read_file_data(filepath):
    """读取文件内容"""
    with open(filepath, 'rb') as fp:
        return fp.read()


def compute_file_hashes_if_needed(filepath, compute_hashes):
    """计算文件哈希(如果需要)"""
    if compute_hashes:
        md5 = compute_hash(filepath, 'md5')
        sha256 = compute_hash(filepath, 'sha256')
        return md5, sha256
    return None, None


def add_to_manifest(manifest_data, arc_name, file_info, file_hashes):
    """将文件信息添加到清单"""
    md5, sha256 = file_hashes
    manifest_data.append({
        'path': arc_name,  # arc_name 已经是 POSIX 格式
        'size': format_size(file_info['size']),
        'md5': md5 if md5 else 'error',
        'sha256': sha256 if sha256 else 'error'
    })


def add_manifest_to_zip(zip_file, manifest_data, latest_mtime):
    """将清单添加到ZIP文件"""
    manifest_info = zipfile.ZipInfo('_MANIFEST.yaml')
    
    if latest_mtime > 0:
        manifest_info.date_time = mtime_to_ziptime(latest_mtime)
    else:
        # 固定时间戳作为后备
        manifest_info.date_time = (2000, 1, 1, 0, 0, 0)
    
    manifest_info.compress_type = zipfile.ZIP_DEFLATED
    
    # 设置跨平台一致的属性
    manifest_info.create_system = 3  # Unix
    manifest_info.external_attr = 0o100644 << 16  # 普通文件权限
    
    # 将数据转换为YAML格式
    manifest_yaml = yaml.dump(
        {'files': manifest_data}, 
        allow_unicode=True, 
        sort_keys=False,
        default_flow_style=False
    )
    
    # 确保使用 Unix 换行符 (LF) 而不是 Windows 换行符 (CRLF)
    manifest_yaml = manifest_yaml.replace('\r\n', '\n').replace('\r', '\n')
    
    # 使用 UTF-8 编码写入
    zip_file.writestr(manifest_info, manifest_yaml.encode('utf-8'))


def print_results(files, stats, output_path=None):
    """打印结果"""
    print("\n--- Scan Results ---")
    print(f"Directories scanned: {stats['dirs']}")
    print(f"Files scanned:       {stats['files']}")
    print(f"Files matched:       {stats['matched']}")
    
    if stats['skipped_size']:
        print(f"Skipped (too large): {stats['skipped_size']}")
    if stats['skipped_err']:
        print(f"Skipped (errors):    {stats['skipped_err']}")
    
    if files:
        total_size = sum(f['size'] for f in files)
        print(f"Total size:          {format_size(total_size)}")
        
        # 按扩展名统计
        ext_count = {}
        for f in files:
            ext = f['ext'] or '(none)'
            ext_count[ext] = ext_count.get(ext, 0) + 1
        
        print("\nBy extension:")
        for ext, count in sorted(ext_count.items(), key=lambda x: -x[1]):
            print(f"  {ext}: {count}")
    
    if output_path and output_path.exists():
        print("\n--- Output File ---")
        print(f"Path:   {output_path}")
        print(f"Size:   {format_size(output_path.stat().st_size)}")
        
        md5, sha256 = compute_file_hashes(output_path)
        if md5:
            print(f"MD5:    {md5}")
        if sha256:
            print(f"SHA256: {sha256}")


def list_files(files):
    """列出找到的文件"""
    if not files:
        print("No files found.")
        return
    
    print(f"\n{'Path':<60} {'Size':>10} {'Ext':>8}")
    print("-" * 80)
    for f in sorted(files, key=lambda x: x['rel_path'].as_posix()):
        print(f"{str(f['rel_path']):<60} {format_size(f['size']):>10} {f['ext']:>8}")


def main():
    parser = argparse.ArgumentParser(
        description='Extract executables/scripts/DLLs for virus scanning.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s /path/to/scan
  %(prog)s /path/to/scan -o scan_result.zip
  %(prog)s . --categories windows scripts_win
  %(prog)s . --max-size 50 --no-recursive
  %(prog)s . --exclude-dir node_modules --exclude-dir .git
  %(prog)s . --list-only
  %(prog)s . --check-executable

Categories:
  windows         Windows executables (.exe .dll .sys .ocx ...)
  scripts_win     Windows scripts (.bat .cmd .ps1 .vbs .hta ...)
  linux           Linux/Unix executables (.sh .so .ko .bin ...)
  macos           macOS files (.dylib .pkg .dmg .command ...)
  cross_platform  Cross-platform scripts (.py .pl .rb .jar ...)
  office_macro    Office macro files (.docm .xlsm .pptm ...)
  other           Other risky files (.lnk .reg .chm .iso ...)

Recommended online scanners:
  VirusTotal:       网页链接(www.virustotal.com)
  Hybrid Analysis:  https://www.hybrid-analysis.com/
  MetaDefender:     https://metadefender.opswat.com/
"""
    )
    
    parser.add_argument('directory', help='directory to scan')
    parser.add_argument('-o', '--output', help='output ZIP file path')
    parser.add_argument('-c', '--categories', nargs='+', 
                        choices=list(EXTENSIONS.keys()),
                        metavar='CAT',
                        help='file categories to include (default: all)')
    parser.add_argument('-m', '--max-size', type=int, default=100,
                        help='max file size in MB (default: 100)')
    parser.add_argument('-e', '--exclude-dir', action='append', dest='exclude_dirs',
                        metavar='DIR',
                        help='directory name to exclude (can be used multiple times)')
    parser.add_argument('--no-recursive', action='store_true',
                        help='do not scan subdirectories')
    parser.add_argument('--no-hash', action='store_true',
                        help='do not compute MD5 hashes for files in manifest')
    parser.add_argument('--list-only', action='store_true',
                        help='list files only, do not create ZIP')
    parser.add_argument('--check-executable', action='store_true',
                        help='include executable files without extension (Unix)')
    parser.add_argument('--show-extensions', action='store_true',
                        help='show all supported extensions and exit')
    
    args = parser.parse_args()
    
    # 显示扩展名列表
    if args.show_extensions:
        for cat, exts in sorted(EXTENSIONS.items()):
            print(f"\n[{cat}]")
            print(f"  {' '.join(sorted(exts))}")
        return 0
    
    # 验证目录
    source_dir = Path(args.directory).resolve()
    if not source_dir.exists():
        print(f"ERROR: directory not found: {source_dir}", file=sys.stderr)
        return 1
    if not source_dir.is_dir():
        print(f"ERROR: not a directory: {source_dir}", file=sys.stderr)
        return 1
    
    # 获取扩展名
    extensions = get_all_extensions(args.categories)
    max_size = args.max_size * 1024 * 1024 if args.max_size else None
    exclude_dirs = set(args.exclude_dirs) if args.exclude_dirs else set()
    
    print(f"Scanning: {source_dir}")
    print(f"Categories: {', '.join(args.categories) if args.categories else 'all'}")
    if exclude_dirs:
        print(f"Excluding: {', '.join(sorted(exclude_dirs))}")
    
    # 扫描
    files, stats = scan_directory(
        source_dir,
        extensions,
        recursive=not args.no_recursive,
        exclude_dirs=exclude_dirs,
        max_size=max_size,
        check_exec=args.check_executable,
    )
    
    # 输出结果
    if args.list_only:
        list_files(files)
        print_results(files, stats)
        return 0
    
    # 创建ZIP
    if not files:
        print("No matching files found.")
        return 0
    
    if args.output:
        output_path = Path(args.output)
    else:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_path = Path(f"executables_{timestamp}.zip")
    
    print(f"Creating ZIP: {output_path}")
    create_zip(files, output_path, compute_hashes=not args.no_hash)
    print_results(files, stats, output_path)
    
    return 0


if __name__ == '__main__':
    sys.exit(main())
评论区

这个up主感受到了孤独

logo
有维咔App就够了
随时随地发现资源,免去网页端烦恼广告
打开App