4837 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / safe_extract.py PY
"""
Safe archive auditor and extractor for RAR/ZIP files.
- Flags dangerous paths (.. traversal, absolute paths, UNC, weird prefixes)
- Optionally extracts only safe entries, preserving directory structure
Requires:
  pip install rarfile
  (and UnRAR/Unarchiver installed on your system for rarfile to read .rar)
"""

import os
import sys
import zipfile
from pathlib import Path

try:
    import rarfile  # pip install rarfile
    HAS_RARFILE = True
except Exception:
    HAS_RARFILE = False


DANGEROUS_PREFIXES = (
    "/", "\\",               # absolute on *nix or root-like on Windows
    "C:\\", "D:\\",          # absolute Windows drives (extend as needed)
)
UNC_PREFIX = ("\\\\",)       # Windows UNC paths


def is_dangerous_path(member_name: str) -> bool:
    # Normalize separators and collapse things like a/b/../c
    norm = Path(member_name.replace("\\", "/")).as_posix()

    # Reject absolute/UNC
    if norm.startswith(DANGEROUS_PREFIXES) or member_name.startswith(UNC_PREFIX):
        return True

    # Reject traversal at any position
    parts = [p for p in norm.split("/") if p not in ("", ".")]
    if any(p == ".." for p in parts):
        return True

    # Extra hardening for Windows reserved names and device paths
    lowered = norm.lower()
    if lowered.startswith(("con", "prn", "aux", "nul")):
        # crude, but catches suspicious names like "con" or "aux.txt" at root
        return True
    if lowered.startswith(("\\\\?\\", "\\\\.\\", "?:/")):
        return True

    # Disallow weird control chars
    if any(ord(c) < 32 for c in member_name):
        return True

    return False


def safe_join(base_dir: Path, member_name: str) -> Path:
    # Build a final destination path and ensure it stays under base_dir
    target = base_dir / member_name
    try:
        target.resolve().relative_to(base_dir.resolve())
    except Exception:
        raise ValueError(f"path escapes extraction dir: {member_name}")
    return target


def audit_zip(path: Path):
    issues = []
    with zipfile.ZipFile(path) as zf:
        for info in zf.infolist():
            name = info.filename
            if is_dangerous_path(name):
                issues.append(("danger", name))
            else:
                issues.append(("ok", name))
    return issues


def extract_zip_safe(path: Path, dest: Path):
    dest.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(path) as zf:
        for info in zf.infolist():
            name = info.filename
            if is_dangerous_path(name):
                print(f"[SKIP] dangerous path in ZIP: {name}")
                continue
            out_path = safe_join(dest, name)
            if name.endswith("/"):
                out_path.mkdir(parents=True, exist_ok=True)
            else:
                out_path.parent.mkdir(parents=True, exist_ok=True)
                with zf.open(info, "r") as src, open(out_path, "wb") as dst:
                    dst.write(src.read())


def audit_rar(path: Path):
    if not HAS_RARFILE:
        raise RuntimeError("rarfile module not available. pip install rarfile")
    issues = []
    with rarfile.RarFile(path) as rf:
        for info in rf.infolist():
            name = info.filename
            if is_dangerous_path(name):
                issues.append(("danger", name))
            else:
                issues.append(("ok", name))
    return issues


def extract_rar_safe(path: Path, dest: Path):
    if not HAS_RARFILE:
        raise RuntimeError("rarfile module not available. pip install rarfile")
    dest.mkdir(parents=True, exist_ok=True)
    with rarfile.RarFile(path) as rf:
        for info in rf.infolist():
            name = info.filename
            if is_dangerous_path(name):
                print(f"[SKIP] dangerous path in RAR: {name}")
                continue
            out_path = safe_join(dest, name)
            if info.isdir():
                out_path.mkdir(parents=True, exist_ok=True)
            else:
                out_path.parent.mkdir(parents=True, exist_ok=True)
                with rf.open(info, "r") as src, open(out_path, "wb") as dst:
                    dst.write(src.read())


def main():
    if len(sys.argv) < 3:
        print("Usage: python safe_extract.py <audit|extract> <archive> [dest]")
        sys.exit(1)

    cmd = sys.argv[1].lower()
    archive = Path(sys.argv[2])

    if archive.suffix.lower() == ".zip":
        if cmd == "audit":
            for status, name in audit_zip(archive):
                tag = "!!" if status == "danger" else "OK"
                print(f"[{tag}] {name}")
        elif cmd == "extract":
            dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("")  # default dest
            extract_zip_safe(archive, dest)
            print(f"Extracted safe ZIP entries to: {dest}")
        else:
            print("Unknown command")
    elif archive.suffix.lower() == ".rar":
        if cmd == "audit":
            for status, name in audit_rar(archive):
                tag = "!!" if status == "danger" else "OK"
                print(f"[{tag}] {name}")
        elif cmd == "extract":
            dest = Path(sys.argv[3]) if len(sys.argv) > 3 else archive.with_suffix("")
            extract_rar_safe(archive, dest)
            print(f"Extracted safe RAR entries to: {dest}")
        else:
            print("Unknown command")
    else:
        print("Unsupported archive type. Use .zip or .rar")


if __name__ == "__main__":
    main()