README.md
Rendering markdown...
"""
Safe archive auditor and extractor for RAR/ZIP files.
- Flags dangerous paths (.. traversal, absolute paths, UNC, weird prefixes)
- Optionally extracts only safe entries, preserving directory structure
Requires:
pip install rarfile
(and UnRAR/Unarchiver installed on your system for rarfile to read .rar)
"""
import os
import sys
import zipfile
from pathlib import Path
try:
import rarfile # pip install rarfile
HAS_RARFILE = True
except Exception:
HAS_RARFILE = False
DANGEROUS_PREFIXES = (
"/", "\\", # absolute on *nix or root-like on Windows
"C:\\", "D:\\", # absolute Windows drives (extend as needed)
)
UNC_PREFIX = ("\\\\",) # Windows UNC paths
def is_dangerous_path(member_name: str) -> bool:
# Normalize separators and collapse things like a/b/../c
norm = Path(member_name.replace("\\", "/")).as_posix()
# Reject absolute/UNC
if norm.startswith(DANGEROUS_PREFIXES) or member_name.startswith(UNC_PREFIX):
return True
# Reject traversal at any position
parts = [p for p in norm.split("/") if p not in ("", ".")]
if any(p == ".." for p in parts):
return True
# Extra hardening for Windows reserved names and device paths
lowered = norm.lower()
if lowered.startswith(("con", "prn", "aux", "nul")):
# crude, but catches suspicious names like "con" or "aux.txt" at root
return True
if lowered.startswith(("\\\\?\\", "\\\\.\\", "?:/")):
return True
# Disallow weird control chars
if any(ord(c) < 32 for c in member_name):
return True
return False
def safe_join(base_dir: Path, member_name: str) -> Path:
# Build a final destination path and ensure it stays under base_dir
target = base_dir / member_name
try:
target.resolve().relative_to(base_dir.resolve())
except Exception:
raise ValueError(f"path escapes extraction dir: {member_name}")
return target
def audit_zip(path: Path):
issues = []
with zipfile.ZipFile(path) as zf:
for info in zf.infolist():
name = info.filename
if is_dangerous_path(name):
issues.append(("danger", name))
else:
issues.append(("ok", name))
return issues
def extract_zip_safe(path: Path, dest: Path):
dest.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(path) as zf:
for info in zf.infolist():
name = info.filename
if is_dangerous_path(name):
print(f"[SKIP] dangerous path in ZIP: {name}")
continue
out_path = safe_join(dest, name)
if name.endswith("/"):
out_path.mkdir(parents=True, exist_ok=True)
else:
out_path.parent.mkdir(parents=True, exist_ok=True)
with zf.open(info, "r") as src, open(out_path, "wb") as dst:
dst.write(src.read())
def audit_rar(path: Path):
if not HAS_RARFILE:
raise RuntimeError("rarfile module not available. pip install rarfile")
issues = []
with rarfile.RarFile(path) as rf:
for info in rf.infolist():
name = info.filename
if is_dangerous_path(name):
issues.append(("danger", name))
else:
issues.append(("ok", name))
return issues
def extract_rar_safe(path: Path, dest: Path):
if not HAS_RARFILE:
raise RuntimeError("rarfile module not available. pip install rarfile")
dest.mkdir(parents=True, exist_ok=True)
with rarfile.RarFile(path) as rf:
for info in rf.infolist():
name = info.filename
if is_dangerous_path(name):
print(f"[SKIP] dangerous path in RAR: {name}")
continue
out_path = safe_join(dest, name)
if info.isdir():
out_path.mkdir(parents=True, exist_ok=True)
else:
out_path.parent.mkdir(parents=True, exist_ok=True)
with rf.open(info, "r") as src, open(out_path, "wb") as dst:
dst.write(src.read())
def main():
if len(sys.argv) < 3:
print("Usage: python safe_extract.py <audit|extract> <archive> [dest]")
sys.exit(1)
cmd = sys.argv[1].lower()
archive = Path(sys.argv[2])
if archive.suffix.lower() == ".zip":
if cmd == "audit":
for status, name in audit_zip(archive):
tag = "!!" if status == "danger" else "OK"
print(f"[{tag}] {name}")
elif cmd == "extract":
dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("") # default dest
extract_zip_safe(archive, dest)
print(f"Extracted safe ZIP entries to: {dest}")
else:
print("Unknown command")
elif archive.suffix.lower() == ".rar":
if cmd == "audit":
for status, name in audit_rar(archive):
tag = "!!" if status == "danger" else "OK"
print(f"[{tag}] {name}")
elif cmd == "extract":
dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("")
extract_rar_safe(archive, dest)
print(f"Extracted safe RAR entries to: {dest}")
else:
print("Unknown command")
else:
print("Unsupported archive type. Use .zip or .rar")
if __name__ == "__main__":
main()