#include <ctype.h> #include <stdio.h> #include <string.h> /* These two are for htonl / ntohl */ #include <arpa/inet.h> #include <netinet/in.h> #include <parserutils/charset/codec.h> #include "utils/utils.h" #include "testutils.h" typedef struct line_ctx { parserutils_charset_codec *codec; size_t buflen; size_t bufused; uint8_t *buf; size_t explen; size_t expused; uint8_t *exp; bool indata; bool inexp; parserutils_error exp_ret; enum { ENCODE, DECODE, BOTH } dir; } line_ctx; static bool handle_line(const char *data, size_t datalen, void *pw); static void run_test(line_ctx *ctx); static void *myrealloc(void *ptr, size_t len, void *pw) { UNUSED(pw); return realloc(ptr, len); } int main(int argc, char **argv) { parserutils_charset_codec *codec; line_ctx ctx; if (argc != 2) { printf("Usage: %s <filename>\n", argv[0]); return 1; } assert(parserutils_charset_codec_create("NATS-SEFI-ADD", myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING); assert(parserutils_charset_codec_create("UTF-16", myrealloc, NULL, &ctx.codec) == PARSERUTILS_OK); ctx.buflen = parse_filesize(argv[1]); if (ctx.buflen == 0) return 1; ctx.buf = malloc(ctx.buflen); if (ctx.buf == NULL) { printf("Failed allocating %u bytes\n", (int) ctx.buflen); return 1; } ctx.exp = malloc(ctx.buflen); if (ctx.exp == NULL) { printf("Failed allocating %u bytes\n", (int) ctx.buflen); free(ctx.buf); return 1; } ctx.explen = ctx.buflen; ctx.buf[0] = '\0'; ctx.exp[0] = '\0'; ctx.bufused = 0; ctx.expused = 0; ctx.indata = false; ctx.inexp = false; ctx.exp_ret = PARSERUTILS_OK; assert(parse_testfile(argv[1], handle_line, &ctx) == true); /* and run final test */ if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n') ctx.bufused -= 1; if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n') ctx.expused -= 1; run_test(&ctx); free(ctx.buf); parserutils_charset_codec_destroy(ctx.codec); printf("PASS\n"); return 0; } /** * Converts hex character ('0' ... '9' or 'a' ... 'f' or 'A' ... 'F') to * digit value. * \param hex Valid hex character * \return Corresponding digit value. */ static inline int hex2digit(char hex) { return (hex <= '9') ? hex - '0' : (hex | 0x20) - 'a' + 10; } bool handle_line(const char *data, size_t datalen, void *pw) { line_ctx *ctx = (line_ctx *) pw; if (data[0] == '#') { if (ctx->inexp) { /* This marks end of testcase, so run it */ if (ctx->buf[ctx->bufused - 1] == '\n') ctx->bufused -= 1; if (ctx->exp[ctx->expused - 1] == '\n') ctx->expused -= 1; run_test(ctx); ctx->buf[0] = '\0'; ctx->exp[0] = '\0'; ctx->bufused = 0; ctx->expused = 0; ctx->exp_ret = PARSERUTILS_OK; } if (strncasecmp(data+1, "data", 4) == 0) { parserutils_charset_codec_optparams params; const char *ptr = data + 6; ctx->indata = true; ctx->inexp = false; if (strncasecmp(ptr, "decode", 6) == 0) ctx->dir = DECODE; else if (strncasecmp(ptr, "encode", 6) == 0) ctx->dir = ENCODE; else ctx->dir = BOTH; ptr += 7; if (strncasecmp(ptr, "LOOSE", 5) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE; ptr += 6; } else if (strncasecmp(ptr, "STRICT", 6) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_STRICT; ptr += 7; } else { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT; ptr += 9; } assert(parserutils_charset_codec_setopt(ctx->codec, PARSERUTILS_CHARSET_CODEC_ERROR_MODE, (parserutils_charset_codec_optparams *) ¶ms) == PARSERUTILS_OK); } else if (strncasecmp(data+1, "expected", 8) == 0) { ctx->indata = false; ctx->inexp = true; ctx->exp_ret = parserutils_error_from_string(data + 10, datalen - 10 - 1 /* \n */); } else if (strncasecmp(data+1, "reset", 5) == 0) { ctx->indata = false; ctx->inexp = false; parserutils_charset_codec_reset(ctx->codec); } } else { if (ctx->indata) { /* Process "&#xNNNN" as 16-bit code units. */ while (datalen) { uint16_t nCodePoint; if (data[0] == '\n') { ctx->buf[ctx->bufused++] = *data++; --datalen; continue; } assert(datalen >= sizeof ("&#xNNNN")-1 \ && data[0] == '&' && data[1] == '#' \ && data[2] == 'x' && isxdigit(data[3]) \ && isxdigit(data[4]) && isxdigit(data[5]) \ && isxdigit(data[6])); /* UTF-16 code is always host endian (different than UCS-32 !). */ nCodePoint = (hex2digit(data[3]) << 12) | (hex2digit(data[4]) << 8) | (hex2digit(data[5]) << 4) | hex2digit(data[6]); *((uint16_t *) (void *) (ctx->buf + ctx->bufused)) = nCodePoint; ctx->bufused += 2; data += sizeof ("&#xNNNN")-1; datalen -= sizeof ("&#xNNNN")-1; } } if (ctx->inexp) { /* Process "&#xXXXXYYYY as 32-bit code units. */ while (datalen) { uint32_t nCodePoint; if (data[0] == '\n') { ctx->exp[ctx->expused++] = *data++; --datalen; continue; } assert(datalen >= sizeof ("&#xXXXXYYYY")-1 \ && data[0] == '&' && data[1] == '#' \ && data[2] == 'x' && isxdigit(data[3]) \ && isxdigit(data[4]) && isxdigit(data[5]) \ && isxdigit(data[6]) && isxdigit(data[7]) \ && isxdigit(data[8]) && isxdigit(data[9]) \ && isxdigit(data[10])); /* UCS-4 code is always big endian, so convert host endian to big endian. */ nCodePoint = htonl((hex2digit(data[3]) << 28) | (hex2digit(data[4]) << 24) | (hex2digit(data[5]) << 20) | (hex2digit(data[6]) << 16) | (hex2digit(data[7]) << 12) | (hex2digit(data[8]) << 8) | (hex2digit(data[9]) << 4) | hex2digit(data[10])); *((uint32_t *) (void *) (ctx->exp + ctx->expused)) = nCodePoint; ctx->expused += 4; data += sizeof ("&#xXXXXYYYY")-1; datalen -= sizeof ("&#xXXXXYYYY")-1; } } } return true; } void run_test(line_ctx *ctx) { static int testnum; size_t destlen = ctx->bufused * 4; uint8_t *dest = alloca(destlen); uint8_t *pdest = dest; const uint8_t *psrc = ctx->buf; size_t srclen = ctx->bufused; size_t i; if (ctx->dir == DECODE) { assert(parserutils_charset_codec_decode(ctx->codec, &psrc, &srclen, &pdest, &destlen) == ctx->exp_ret); } else if (ctx->dir == ENCODE) { assert(parserutils_charset_codec_encode(ctx->codec, &psrc, &srclen, &pdest, &destlen) == ctx->exp_ret); } else { size_t templen = ctx->bufused * 4; uint8_t *temp = alloca(templen); uint8_t *ptemp = temp; const uint8_t *ptemp2; size_t templen2; assert(parserutils_charset_codec_decode(ctx->codec, &psrc, &srclen, &ptemp, &templen) == ctx->exp_ret); /* \todo currently there is no way to specify the number of consumed & produced data in case of a deliberate bad input data set. */ if (ctx->exp_ret == PARSERUTILS_OK) { assert(temp + (ctx->bufused * 4 - templen) == ptemp); } ptemp2 = temp; templen2 = ctx->bufused * 4 - templen; assert(parserutils_charset_codec_encode(ctx->codec, &ptemp2, &templen2, &pdest, &destlen) == ctx->exp_ret); if (ctx->exp_ret == PARSERUTILS_OK) { assert(templen2 == 0); assert(temp + (ctx->bufused * 4 - templen) == ptemp2); } } if (ctx->exp_ret == PARSERUTILS_OK) { assert(srclen == 0); assert(ctx->buf + ctx->bufused == psrc); assert(dest + (ctx->bufused * 4 - destlen) == pdest); assert(ctx->bufused * 4 - destlen == ctx->expused); } printf("%d: Read '", ++testnum); for (i = 0; i < ctx->expused; i++) { printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf], "0123456789abcdef"[dest[i] & 0xf]); } printf("' Expected '"); for (i = 0; i < ctx->expused; i++) { printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf], "0123456789abcdef"[ctx->exp[i] & 0xf]); } printf("'\n"); assert(pdest == dest + ctx->expused); assert(memcmp(dest, ctx->exp, ctx->expused) == 0); }