README.md
Rendering markdown...
# generate_report.cmake - Generate summary report of bug reproduction
message("Generating comprehensive bug reproduction report...")
# Read all log files and generate markdown report
file(WRITE "report.md" "# TAR PAX Size Override Bug Reproduction Report\n\n")
file(APPEND "report.md" "Generated: ")
string(TIMESTAMP current_time "%Y-%m-%d %H:%M:%S")
file(APPEND "report.md" "${current_time}\n\n")
file(APPEND "report.md" "## Bug Summary\n\n")
file(APPEND "report.md" "This reproduction demonstrates a critical bug in tokio-tar and related async tar libraries:\n\n")
file(APPEND "report.md" "**Root Cause:** PAX extended header `size` overrides are not applied before calculating the next header position.\n\n")
file(APPEND "report.md" "**Impact:** When processing tar files with large files (>8GB) that require PAX extensions:\n")
file(APPEND "report.md" "- The library uses the wrong file size (0 from octal field instead of PAX override)\n")
file(APPEND "report.md" "- Position calculation becomes incorrect\n")
file(APPEND "report.md" "- Library jumps into file content and mistakes it for tar headers\n")
file(APPEND "report.md" "- Extracts wrong files from \"inner\" content instead of continuing outer tar\n\n")
file(APPEND "report.md" "---\n\n")
# Add GNU tar output
file(APPEND "report.md" "## GNU tar Output (Baseline)\n\n")
file(APPEND "report.md" "What the system `tar` command sees in the reproduction file:\n\n")
file(APPEND "report.md" "```\n")
if(EXISTS "logs/tar_tvf_output.txt")
file(READ "logs/tar_tvf_output.txt" tar_output)
file(APPEND "report.md" "${tar_output}")
else()
file(APPEND "report.md" "ERROR: tar_tvf_output.txt not found\n")
endif()
file(APPEND "report.md" "```\n\n")
# Add correct tarwalk output
file(APPEND "report.md" "## Correct TAR Parser Output\n\n")
file(APPEND "report.md" "Output from a TAR parser that correctly handles PAX extensions:\n\n")
file(APPEND "report.md" "```\n")
if(EXISTS "logs/tarwalk_correct.txt")
file(READ "logs/tarwalk_correct.txt" correct_output)
file(APPEND "report.md" "${correct_output}")
else()
file(APPEND "report.md" "ERROR: tarwalk_correct.txt not found\n")
endif()
file(APPEND "report.md" "```\n\n")
# Add buggy tarwalk output
file(APPEND "report.md" "## Buggy TAR Parser Output\n\n")
file(APPEND "report.md" "Output from a TAR parser with the same bug as tokio-tar:\n\n")
file(APPEND "report.md" "```\n")
if(EXISTS "logs/tarwalk_buggy.txt")
file(READ "logs/tarwalk_buggy.txt" buggy_output)
file(APPEND "report.md" "${buggy_output}")
else()
file(APPEND "report.md" "ERROR: tarwalk_buggy.txt not found\n")
endif()
file(APPEND "report.md" "```\n\n")
# Add Rust comparison output
file(APPEND "report.md" "## Rust Library Comparison\n\n")
file(APPEND "report.md" "Direct comparison between `tar` crate (correct) and `tokio-tar` (buggy):\n\n")
file(APPEND "report.md" "```\n")
if(EXISTS "logs/rust_comparison.txt")
file(READ "logs/rust_comparison.txt" rust_output)
file(APPEND "report.md" "${rust_output}")
else()
file(APPEND "report.md" "ERROR: rust_comparison.txt not found\n")
endif()
file(APPEND "report.md" "```\n\n")
# Add technical analysis
file(APPEND "report.md" "## Technical Analysis\n\n")
file(APPEND "report.md" "### The Bug Sequence\n\n")
file(APPEND "report.md" "1. **PAX Extended Header**: Contains `size=1024` override\n")
file(APPEND "report.md" "2. **Following File Header**: Has octal size field = `000000000000` (zero)\n")
file(APPEND "report.md" "3. **Correct Behavior**: Use PAX size (1024), skip 1024 bytes + padding\n")
file(APPEND "report.md" "4. **Buggy Behavior**: Use octal size (0), skip 0 bytes\n")
file(APPEND "report.md" "5. **Result**: Land in file content, mistake fake tar header for real entry\n\n")
file(APPEND "report.md" "### Why This Affects Docker Images\n\n")
file(APPEND "report.md" "- Docker layers often exceed 8GB (ustar octal size limit)\n")
file(APPEND "report.md" "- Tools write PAX `size` override with zero in octal field\n")
file(APPEND "report.md" "- Layer content starts with filesystem tar headers (`etc/`, `usr/`, etc.)\n")
file(APPEND "report.md" "- Buggy parsers extract filesystem content instead of continuing with image manifest\n\n")
file(APPEND "report.md" "### The Fix\n\n")
file(APPEND "report.md" "Libraries must apply PAX overrides **before** position calculations:\n\n")
file(APPEND "report.md" "```rust\n")
file(APPEND "report.md" "// Read header\n")
file(APPEND "report.md" "let mut file_size = header.size();\n")
file(APPEND "report.md" "\n")
file(APPEND "report.md" "// Apply PAX overrides BEFORE calculating next position\n")
file(APPEND "report.md" "if let Some(pax_size) = pending_pax.get(\"size\") {\n")
file(APPEND "report.md" " file_size = pax_size.parse().unwrap();\n")
file(APPEND "report.md" "}\n")
file(APPEND "report.md" "\n")
file(APPEND "report.md" "// Now calculate next header position using effective size\n")
file(APPEND "report.md" "let next_pos = current_pos + 512 + pad_to_512(file_size);\n")
file(APPEND "report.md" "```\n\n")
# Add files information
file(APPEND "report.md" "## Reproduction Files\n\n")
file(APPEND "report.md" "- `pax_bug_compact.tar` - Minimal reproduction case\n")
file(APPEND "report.md" "- `logs/` - Detailed output from each tool\n\n")
file(APPEND "report.md" "## Build and Test Instructions\n\n")
file(APPEND "report.md" "```bash\n")
file(APPEND "report.md" "mkdir build && cd build\n")
file(APPEND "report.md" "cmake ..\n")
file(APPEND "report.md" "make reproduce_bug\n")
file(APPEND "report.md" "```\n\n")
file(APPEND "report.md" "The complete reproduction will be in the `output/` directory.\n")
message("Report generated: report.md")
# Also create a simple text summary
file(WRITE "RESULTS.txt" "TAR PAX BUG REPRODUCTION RESULTS\n")
file(APPEND "RESULTS.txt" "===================================\n\n")
# Check if the bug was reproduced by looking for key indicators
if(EXISTS "logs/rust_comparison.txt")
file(READ "logs/rust_comparison.txt" rust_content)
if("${rust_content}" MATCHES "INNER_FILE")
file(APPEND "RESULTS.txt" "✅ BUG SUCCESSFULLY REPRODUCED!\n\n")
file(APPEND "RESULTS.txt" "Evidence:\n")
file(APPEND "RESULTS.txt" "- tokio-tar extracted 'INNER_FILE' (wrong)\n")
file(APPEND "RESULTS.txt" "- sync tar extracted 'marker.txt' (correct)\n\n")
else()
file(APPEND "RESULTS.txt" "❌ Bug not reproduced - libraries behaved identically\n\n")
endif()
if("${rust_content}" MATCHES "sync=3, async=4")
file(APPEND "RESULTS.txt" "- Entry count mismatch detected (sync=3, async=4)\n")
endif()
else()
file(APPEND "RESULTS.txt" "❌ No comparison results found\n\n")
endif()
file(APPEND "RESULTS.txt" "See report.md for full technical details.\n")
message("Summary generated: RESULTS.txt")
# vim: set ts=4 sts=4 sw=4 et: