README.md
Rendering markdown...
#!/usr/bin/env python3
"""
Simple PDF reader to test pypdf behavior for vulnerability CVE-2026-24688.
Usage:
python simple_read_pdf.py <path_to_pdf>
This will attempt to:
1. Open the PDF
2. Access basic metadata
3. Access outline/bookmarks (THIS is where circular ref vulnerability triggers)
4. Extract text from first page
⚠️ WARNING: If PDF has circular outline references, this will hang!
"""
import sys
import time
from pathlib import Path
# Add pypdf to path
sys.path.insert(0, str(Path(__file__).parent))
from pypdf import PdfReader
def read_pdf(pdf_path: str):
"""
Read PDF and display information.
Args:
pdf_path: Path to PDF file
"""
pdf_path = Path(pdf_path)
if not pdf_path.exists():
print(f"❌ Error: File not found: {pdf_path}")
return 1
print("=" * 70)
print(f"📄 Reading PDF: {pdf_path.name}")
print("=" * 70)
print()
# Step 1: Open PDF
print("Step 1: Opening PDF...")
start = time.time()
try:
reader = PdfReader(str(pdf_path))
elapsed = time.time() - start
print(f"✅ Opened successfully ({elapsed:.3f}s)")
except Exception as e:
print(f"❌ Failed to open: {e}")
return 1
print()
# Step 2: Basic metadata
print("Step 2: Reading metadata...")
try:
metadata = reader.metadata
if metadata:
print(f" Title: {metadata.get('/Title', 'N/A')}")
print(f" Author: {metadata.get('/Author', 'N/A')}")
print(f" Subject: {metadata.get('/Subject', 'N/A')}")
print(f" Creator: {metadata.get('/Creator', 'N/A')}")
else:
print(" No metadata found")
print(f" Pages: {len(reader.pages)}")
print(f" Encrypted: {reader.is_encrypted}")
except Exception as e:
print(f"⚠️ Warning: {e}")
print()
# Step 3: Outline (THIS IS WHERE VULNERABILITY TRIGGERS!)
print("Step 3: Reading outline/bookmarks...")
print("⚠️ THIS IS WHERE CIRCULAR REFERENCE VULNERABILITY TRIGGERS!")
print("⏳ If this hangs, you'll need to Ctrl+C to kill it...")
print()
start = time.time()
try:
outline = reader.outline
elapsed = time.time() - start
if outline:
print(f"✅ Outline read successfully ({elapsed:.3f}s)")
print(f" Bookmark count: {len(outline)}")
# Show first few bookmarks
print("\n First few bookmarks:")
for i, item in enumerate(outline[:5]):
if isinstance(item, list):
print(f" [{i+1}] (nested outline)")
else:
title = item.get('/Title', 'Untitled')
print(f" [{i+1}] {title}")
if len(outline) > 5:
print(f" ... and {len(outline) - 5} more")
else:
elapsed = time.time() - start
print(f"✅ No outline/bookmarks ({elapsed:.3f}s)")
except KeyboardInterrupt:
print()
print()
print("=" * 70)
print("❌ KILLED BY USER (Ctrl+C)")
print("=" * 70)
print()
print("🔥 This PDF has CIRCULAR OUTLINE REFERENCES!")
print(" The code was stuck in an infinite loop.")
print()
print("This demonstrates the vulnerability:")
print(" Location: pypdf/_doc_common.py:858-873")
print(" Issue: No cycle detection in outline traversal")
print(" Impact: Denial of Service (infinite loop)")
print()
return 1
except RecursionError as e:
elapsed = time.time() - start
print(f"❌ RecursionError after {elapsed:.3f}s: {e}")
print()
print("🔥 This PDF has NESTED CIRCULAR REFERENCES!")
print(" The code exceeded Python's recursion limit.")
print()
return 1
except Exception as e:
elapsed = time.time() - start
print(f"⚠️ Error after {elapsed:.3f}s: {e}")
return 1
print()
# Step 4: Extract text from first page
print("Step 4: Extracting text from first page...")
try:
if len(reader.pages) > 0:
first_page = reader.pages[0]
text = first_page.extract_text()
if text:
print(f"✅ Text extracted ({len(text)} characters)")
print("\n First 200 characters:")
print(" " + "-" * 66)
preview = text[:200].replace('\n', '\n ')
print(f" {preview}")
if len(text) > 200:
print(" ...")
print(" " + "-" * 66)
else:
print(" No text found on first page")
else:
print(" No pages in PDF")
except Exception as e:
print(f"⚠️ Warning: {e}")
print()
print("=" * 70)
print("✅ PDF read successfully - No vulnerabilities detected")
print("=" * 70)
return 0
def main():
if len(sys.argv) < 2:
print("Simple PDF Reader (pypdf)")
print()
print("Usage:")
print(" python simple_read_pdf.py <path_to_pdf>")
print()
print("Examples:")
print(" python simple_read_pdf.py document.pdf")
print(" python simple_read_pdf.py /path/to/file.pdf")
print()
print("To test the circular reference vulnerability:")
print(" python simple_read_pdf.py malicious_circular_outline.pdf")
print()
print("⚠️ WARNING: Malicious PDFs will hang! Use Ctrl+C to kill.")
print()
return 1
pdf_path = sys.argv[1]
return read_pdf(pdf_path)
if __name__ == "__main__":
try:
sys.exit(main())
except KeyboardInterrupt:
print()
print()
print("=" * 70)
print("⚠️ INTERRUPTED BY USER")
print("=" * 70)
sys.exit(1)