按赞
反对
评论
收藏
分享

可执行文件提取打包py脚本
用于提取游戏目录下的可执行文件,并打包为ZIP文件。
该ZIP包中还有一个文件清单 _MANIFEST.yaml:
files:
- path: Game/Game.exe
size: 2.0MB
md5: f760c2f862cf7d98b570bae41364e05d
sha256: b2fe0f8cd0adb1d221d662e5f33b36b8245413178d8986b0d6dd3c42fc6d78de
- path: Game/MVPluginPatcher.exe
size: 37.5KB
md5: e0cefcd0d34a5e59413344104d5d57a4
sha256: 89edcd5d47333c299c086a5278ed3db20f7b6e471e849f92102b1ef6e42eecf4
- path: Game/d3dcompiler_47.dll
size: 4.3MB
md5: 7641e39b7da4077084d2afe7c31032e0
sha256: 44422e6936dc72b7ac5ed16bb8bcae164b7554513e52efb66a3e942cec328a47
- path: Game/ffmpeg.dll
size: 1.7MB
md5: f2db31ad12fd16c5359ac583181fbfdf
sha256: 44917512b58d193b4361f2d07b723346feb7bc69f2b0ff51002b3c76f88d0f71
你可以将这个ZIP压缩包上传到病毒扫描中心。将扫描结果作为凭证。
只要源文件内容和修改时间不变,生成的 ZIP 文件哈希值就会保持一致。
PS: py .\executable_extractor.py .\花葬巫女サクヤ\ -o test.zip
UNIX: python executable_extractor.py ./花葬巫女サクヤ -o test2.zip
--- Output File ---
Path: test.zip
Size: 48.3MB
MD5: ec02318ec0185b1c52e5263f9a1eeeea
SHA256: e32c426bb04994a65343b537dfb5788342f5d0eff22a8ca182ab6c7f4b480cda
--- Output File ---
Path: test2.zip
Size: 48.3MB
MD5: ec02318ec0185b1c52e5263f9a1eeeea
SHA256: e32c426bb04994a65343b537dfb5788342f5d0eff22a8ca182ab6c7f4b480cda
---
更新内容:
- [x] 实现基础功能
- [x] 解决轻微瑕疵(Lint语法警告,函数复杂度大于15)
- [x] 解决跨平台哈希计算一致性问题 <-
- [ ] 发现新问题
提示:代码中导入了yaml模块,运行脚本之前需要自行安装该模块
脚本源码(-h查看帮助):网页链接(vikingfile.com)
#!/usr/bin/env python3
import os
import sys
import time
import zipfile
import argparse
import hashlib
from pathlib import Path
from datetime import datetime
import yaml
# 可执行文件扩展名(按类别分组)
EXTENSIONS = {
# Windows 可执行文件和库
'windows': {
'.exe', '.dll', '.sys', '.drv', '.ocx', '.cpl', '.scr',
'.com', '.pif', '.msi', '.msix', '.msp', '.mst', '.gadget',
'.ax', '.acm', '.efi', '.mui', '.tsp', '.fon',
},
# Windows 脚本
'scripts_win': {
'.bat', '.cmd', '.ps1', '.psm1', '.psd1',
'.vbs', '.vbe', '.js', '.jse', '.wsf', '.wsh', '.wsc', '.hta',
},
# Unix/Linux 可执行文件和库
'linux': {
'.sh', '.bash', '.zsh', '.ksh', '.csh', '.fish',
'.so', '.ko', '.bin', '.run', '.out', '.elf', '.appimage',
},
# macOS
'macos': {
'.dylib', '.bundle', '.kext', '.plugin',
'.pkg', '.mpkg', '.dmg', '.command', '.tool',
'.action', '.workflow', '.scpt', '.scptd',
},
# 跨平台脚本和字节码
'cross_platform': {
'.py', '.pyc', '.pyw', '.pyz', '.pyzw',
'.pl', '.pm', '.ph', '.plx',
'.rb', '.rbw',
'.php', '.php3', '.php4', '.php5', '.phtml',
'.lua', '.tcl', '.awk', '.sed',
'.jar', '.class', '.war', '.ear',
},
# Office 宏文件(可执行恶意代码)
'office_macro': {
'.docm', '.dotm', '.xlsm', '.xltm', '.xlam', '.xll',
'.pptm', '.potm', '.ppam', '.ppsm', '.sldm',
'.accde', '.accdr', '.accda', '.mde', '.ade', '.adp',
},
# 其他潜在危险文件
'other': {
'.lnk', '.url', '.reg', '.inf',
'.chm', '.hlp', '.msc', '.crt', '.cer',
'.application', '.appref-ms', '.settingcontent-ms',
'.vhd', '.vhdx', '.iso', '.img',
},
}
# 可执行文件扩展名(用于判断是否设置可执行权限)
EXECUTABLE_EXTENSIONS = {
'.exe', '.dll', '.so', '.dylib', '.sh', '.bash', '.zsh', '.ksh', '.csh',
'.py', '.pl', '.rb', '.bat', '.cmd', '.ps1', '.bin', '.run', '.out',
'.elf', '.appimage', '.command', '.tool',
}
def get_all_extensions(categories=None):
"""获取指定类别的所有扩展名"""
if categories is None:
categories = EXTENSIONS.keys()
result = set()
for cat in categories:
if cat in EXTENSIONS:
result.update(EXTENSIONS[cat])
return result
def format_size(size):
"""格式化文件大小"""
for unit in ['B', 'KB', 'MB', 'GB']:
if size < 1024:
return f"{size:.1f}{unit}"
size /= 1024
return f"{size:.1f}TB"
def compute_hash(filepath, algorithm='md5', chunk_size=8192):
"""计算文件哈希值"""
h = hashlib.new(algorithm)
try:
with open(filepath, 'rb') as f:
while chunk := f.read(chunk_size):
h.update(chunk)
return h.hexdigest()
except (OSError, IOError):
return None
def compute_file_hashes(filepath):
"""计算文件的MD5和SHA256"""
md5_hash = hashlib.md5()
sha256_hash = hashlib.sha256()
try:
with open(filepath, 'rb') as f:
while chunk := f.read(8192):
md5_hash.update(chunk)
sha256_hash.update(chunk)
return md5_hash.hexdigest(), sha256_hash.hexdigest()
except (OSError, IOError):
return None, None
def is_elf_or_script(filepath):
"""检查文件是否是ELF二进制或脚本(通过文件头判断)"""
try:
with open(filepath, 'rb') as f:
header = f.read(4)
if header[:4] == b'\x7fELF':
return True
if header[:2] == b'#!':
return True
except (OSError, IOError):
pass
return False
def is_executable_extension(filepath):
"""判断文件扩展名是否表示可执行文件"""
ext = filepath.suffix.lower()
return ext in EXECUTABLE_EXTENSIONS
def should_include(filepath, extensions, check_executable=False):
"""判断文件是否应该被包含"""
ext = filepath.suffix.lower()
if ext in extensions:
return True
# Unix下检查无扩展名但有执行权限的文件
if check_executable and sys.platform != 'win32':
if not ext and os.access(filepath, os.X_OK):
return is_elf_or_script(filepath)
return False
def scan_directory(directory, extensions, recursive=True,
exclude_dirs=None, max_size=None, check_exec=False):
"""扫描目录,返回匹配的文件列表"""
directory = Path(directory).resolve()
exclude_dirs = exclude_dirs or set()
found = []
stats = {'dirs': 0, 'files': 0, 'matched': 0, 'skipped_size': 0, 'skipped_err': 0}
def scan(path):
stats['dirs'] += 1
entries = get_directory_entries(path)
if entries is None:
return
for entry in entries:
if entry.is_dir():
process_directory(entry, recursive, exclude_dirs, scan)
elif entry.is_file():
process_file(entry, extensions, check_exec, max_size, directory, found, stats)
scan(directory)
return found, stats
def get_directory_entries(path):
"""获取目录中的所有条目"""
try:
return list(path.iterdir())
except PermissionError:
print(f"WARN: permission denied: {path}", file=sys.stderr)
return None
def process_directory(entry, recursive, exclude_dirs, scan_func):
"""处理目录条目"""
if recursive and entry.name not in exclude_dirs:
scan_func(entry)
def process_file(entry, extensions, check_exec, max_size, directory, found, stats):
"""处理文件条目"""
stats['files'] += 1
if not should_include(entry, extensions, check_exec):
return
file_info = get_file_info(entry)
if file_info is None:
stats['skipped_err'] += 1
return
size, mtime = file_info
if should_skip_by_size(size, max_size):
stats['skipped_size'] += 1
return
add_file_to_results(entry, directory, size, mtime, found, stats)
def get_file_info(entry):
"""获取文件的大小和修改时间"""
try:
stat_info = entry.stat()
return stat_info.st_size, stat_info.st_mtime
except OSError:
return None
def should_skip_by_size(size, max_size):
"""检查文件是否应该被跳过(因为大小限制)"""
return max_size and size > max_size
def add_file_to_results(entry, directory, size, mtime, found, stats):
"""将文件添加到结果列表"""
rel_path = entry.relative_to(directory)
found.append({
'path': entry,
'rel_path': rel_path,
'size': size,
'mtime': mtime,
'ext': entry.suffix.lower(),
})
stats['matched'] += 1
def mtime_to_ziptime(mtime):
"""将mtime转换为ZIP时间元组"""
t = time.localtime(mtime)
return (t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec)
def create_zip(files, output_path, compute_hashes=True):
"""创建ZIP文件(保证幂等性和跨平台一致性)"""
if not files:
return None
files = sort_files_by_path(files)
manifest_data = []
latest_mtime = 0
# 指定固定的压缩级别以确保跨平台一致性
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
for f in files:
latest_mtime = process_single_file(f, zf, manifest_data, latest_mtime, compute_hashes)
add_manifest_to_zip(zf, manifest_data, latest_mtime)
return output_path
def sort_files_by_path(files):
"""按相对路径排序文件(使用POSIX路径格式确保跨平台一致性)"""
return sorted(files, key=lambda f: f['rel_path'].as_posix())
def process_single_file(file_info, zip_file, manifest_data, latest_mtime, compute_hashes):
"""处理单个文件并将其添加到ZIP中"""
try:
filepath = file_info['path']
# 使用 as_posix() 确保路径分隔符统一为正斜杠
arc_name = file_info['rel_path'].as_posix()
mtime = file_info['mtime']
# 更新最新的修改时间
if mtime > latest_mtime:
latest_mtime = mtime
# 判断是否是可执行文件(基于扩展名,不依赖平台权限)
is_exec = is_executable_extension(filepath)
zip_info = create_zip_info(arc_name, mtime, is_executable=is_exec)
file_data = read_file_data(filepath)
zip_file.writestr(zip_info, file_data)
file_hashes = compute_file_hashes_if_needed(filepath, compute_hashes)
add_to_manifest(manifest_data, arc_name, file_info, file_hashes)
return latest_mtime
except Exception as e:
print(f"ERROR: failed to add {file_info['path']}: {e}", file=sys.stderr)
return latest_mtime
def create_zip_info(arc_name, mtime, is_executable=False):
"""创建ZipInfo对象并设置跨平台一致的属性"""
info = zipfile.ZipInfo(arc_name)
info.date_time = mtime_to_ziptime(mtime)
info.compress_type = zipfile.ZIP_DEFLATED
# 强制使用 Unix 作为创建系统标识符,确保跨平台一致性
# 0 = MS-DOS, 3 = Unix
info.create_system = 3
# 使用固定的Unix权限值,不依赖平台的实际文件权限
# 0o100000 = S_IFREG (普通文件标志)
# 0o755 = rwxr-xr-x (可执行) 0o644 = rw-r--r-- (普通文件)
if is_executable:
mode = 0o100755
else:
mode = 0o100644
info.external_attr = mode << 16
return info
def read_file_data(filepath):
"""读取文件内容"""
with open(filepath, 'rb') as fp:
return fp.read()
def compute_file_hashes_if_needed(filepath, compute_hashes):
"""计算文件哈希(如果需要)"""
if compute_hashes:
md5 = compute_hash(filepath, 'md5')
sha256 = compute_hash(filepath, 'sha256')
return md5, sha256
return None, None
def add_to_manifest(manifest_data, arc_name, file_info, file_hashes):
"""将文件信息添加到清单"""
md5, sha256 = file_hashes
manifest_data.append({
'path': arc_name, # arc_name 已经是 POSIX 格式
'size': format_size(file_info['size']),
'md5': md5 if md5 else 'error',
'sha256': sha256 if sha256 else 'error'
})
def add_manifest_to_zip(zip_file, manifest_data, latest_mtime):
"""将清单添加到ZIP文件"""
manifest_info = zipfile.ZipInfo('_MANIFEST.yaml')
if latest_mtime > 0:
manifest_info.date_time = mtime_to_ziptime(latest_mtime)
else:
# 固定时间戳作为后备
manifest_info.date_time = (2000, 1, 1, 0, 0, 0)
manifest_info.compress_type = zipfile.ZIP_DEFLATED
# 设置跨平台一致的属性
manifest_info.create_system = 3 # Unix
manifest_info.external_attr = 0o100644 << 16 # 普通文件权限
# 将数据转换为YAML格式
manifest_yaml = yaml.dump(
{'files': manifest_data},
allow_unicode=True,
sort_keys=False,
default_flow_style=False
)
# 确保使用 Unix 换行符 (LF) 而不是 Windows 换行符 (CRLF)
manifest_yaml = manifest_yaml.replace('\r\n', '\n').replace('\r', '\n')
# 使用 UTF-8 编码写入
zip_file.writestr(manifest_info, manifest_yaml.encode('utf-8'))
def print_results(files, stats, output_path=None):
"""打印结果"""
print("\n--- Scan Results ---")
print(f"Directories scanned: {stats['dirs']}")
print(f"Files scanned: {stats['files']}")
print(f"Files matched: {stats['matched']}")
if stats['skipped_size']:
print(f"Skipped (too large): {stats['skipped_size']}")
if stats['skipped_err']:
print(f"Skipped (errors): {stats['skipped_err']}")
if files:
total_size = sum(f['size'] for f in files)
print(f"Total size: {format_size(total_size)}")
# 按扩展名统计
ext_count = {}
for f in files:
ext = f['ext'] or '(none)'
ext_count[ext] = ext_count.get(ext, 0) + 1
print("\nBy extension:")
for ext, count in sorted(ext_count.items(), key=lambda x: -x[1]):
print(f" {ext}: {count}")
if output_path and output_path.exists():
print("\n--- Output File ---")
print(f"Path: {output_path}")
print(f"Size: {format_size(output_path.stat().st_size)}")
md5, sha256 = compute_file_hashes(output_path)
if md5:
print(f"MD5: {md5}")
if sha256:
print(f"SHA256: {sha256}")
def list_files(files):
"""列出找到的文件"""
if not files:
print("No files found.")
return
print(f"\n{'Path':<60} {'Size':>10} {'Ext':>8}")
print("-" * 80)
for f in sorted(files, key=lambda x: x['rel_path'].as_posix()):
print(f"{str(f['rel_path']):<60} {format_size(f['size']):>10} {f['ext']:>8}")
def main():
parser = argparse.ArgumentParser(
description='Extract executables/scripts/DLLs for virus scanning.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s /path/to/scan
%(prog)s /path/to/scan -o scan_result.zip
%(prog)s . --categories windows scripts_win
%(prog)s . --max-size 50 --no-recursive
%(prog)s . --exclude-dir node_modules --exclude-dir .git
%(prog)s . --list-only
%(prog)s . --check-executable
Categories:
windows Windows executables (.exe .dll .sys .ocx ...)
scripts_win Windows scripts (.bat .cmd .ps1 .vbs .hta ...)
linux Linux/Unix executables (.sh .so .ko .bin ...)
macos macOS files (.dylib .pkg .dmg .command ...)
cross_platform Cross-platform scripts (.py .pl .rb .jar ...)
office_macro Office macro files (.docm .xlsm .pptm ...)
other Other risky files (.lnk .reg .chm .iso ...)
Recommended online scanners:
VirusTotal: 网页链接(www.virustotal.com)
Hybrid Analysis: https://www.hybrid-analysis.com/
MetaDefender: https://metadefender.opswat.com/
"""
)
parser.add_argument('directory', help='directory to scan')
parser.add_argument('-o', '--output', help='output ZIP file path')
parser.add_argument('-c', '--categories', nargs='+',
choices=list(EXTENSIONS.keys()),
metavar='CAT',
help='file categories to include (default: all)')
parser.add_argument('-m', '--max-size', type=int, default=100,
help='max file size in MB (default: 100)')
parser.add_argument('-e', '--exclude-dir', action='append', dest='exclude_dirs',
metavar='DIR',
help='directory name to exclude (can be used multiple times)')
parser.add_argument('--no-recursive', action='store_true',
help='do not scan subdirectories')
parser.add_argument('--no-hash', action='store_true',
help='do not compute MD5 hashes for files in manifest')
parser.add_argument('--list-only', action='store_true',
help='list files only, do not create ZIP')
parser.add_argument('--check-executable', action='store_true',
help='include executable files without extension (Unix)')
parser.add_argument('--show-extensions', action='store_true',
help='show all supported extensions and exit')
args = parser.parse_args()
# 显示扩展名列表
if args.show_extensions:
for cat, exts in sorted(EXTENSIONS.items()):
print(f"\n[{cat}]")
print(f" {' '.join(sorted(exts))}")
return 0
# 验证目录
source_dir = Path(args.directory).resolve()
if not source_dir.exists():
print(f"ERROR: directory not found: {source_dir}", file=sys.stderr)
return 1
if not source_dir.is_dir():
print(f"ERROR: not a directory: {source_dir}", file=sys.stderr)
return 1
# 获取扩展名
extensions = get_all_extensions(args.categories)
max_size = args.max_size * 1024 * 1024 if args.max_size else None
exclude_dirs = set(args.exclude_dirs) if args.exclude_dirs else set()
print(f"Scanning: {source_dir}")
print(f"Categories: {', '.join(args.categories) if args.categories else 'all'}")
if exclude_dirs:
print(f"Excluding: {', '.join(sorted(exclude_dirs))}")
# 扫描
files, stats = scan_directory(
source_dir,
extensions,
recursive=not args.no_recursive,
exclude_dirs=exclude_dirs,
max_size=max_size,
check_exec=args.check_executable,
)
# 输出结果
if args.list_only:
list_files(files)
print_results(files, stats)
return 0
# 创建ZIP
if not files:
print("No matching files found.")
return 0
if args.output:
output_path = Path(args.output)
else:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = Path(f"executables_{timestamp}.zip")
print(f"Creating ZIP: {output_path}")
create_zip(files, output_path, compute_hashes=not args.no_hash)
print_results(files, stats, output_path)
return 0
if __name__ == '__main__':
sys.exit(main())声明本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得UP主同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理: DMCA投诉/Report












这个up主感受到了孤独