kolibrios/contrib/media/updf/pdf/pdf_cmap_parse.c

491 lines
9.8 KiB
C
Raw Permalink Normal View History

#include "fitz.h"
#include "mupdf.h"
/*
* CMap parser
*/
enum
{
TOK_USECMAP = PDF_NUM_TOKENS,
TOK_BEGIN_CODESPACE_RANGE,
TOK_END_CODESPACE_RANGE,
TOK_BEGIN_BF_CHAR,
TOK_END_BF_CHAR,
TOK_BEGIN_BF_RANGE,
TOK_END_BF_RANGE,
TOK_BEGIN_CID_CHAR,
TOK_END_CID_CHAR,
TOK_BEGIN_CID_RANGE,
TOK_END_CID_RANGE,
TOK_END_CMAP
};
static int
pdf_cmap_token_from_keyword(char *key)
{
if (!strcmp(key, "usecmap")) return TOK_USECMAP;
if (!strcmp(key, "begincodespacerange")) return TOK_BEGIN_CODESPACE_RANGE;
if (!strcmp(key, "endcodespacerange")) return TOK_END_CODESPACE_RANGE;
if (!strcmp(key, "beginbfchar")) return TOK_BEGIN_BF_CHAR;
if (!strcmp(key, "endbfchar")) return TOK_END_BF_CHAR;
if (!strcmp(key, "beginbfrange")) return TOK_BEGIN_BF_RANGE;
if (!strcmp(key, "endbfrange")) return TOK_END_BF_RANGE;
if (!strcmp(key, "begincidchar")) return TOK_BEGIN_CID_CHAR;
if (!strcmp(key, "endcidchar")) return TOK_END_CID_CHAR;
if (!strcmp(key, "begincidrange")) return TOK_BEGIN_CID_RANGE;
if (!strcmp(key, "endcidrange")) return TOK_END_CID_RANGE;
if (!strcmp(key, "endcmap")) return TOK_END_CMAP;
return PDF_TOK_KEYWORD;
}
static int
pdf_code_from_string(char *buf, int len)
{
int a = 0;
while (len--)
a = (a << 8) | *(unsigned char *)buf++;
return a;
}
static fz_error
pdf_lex_cmap(int *tok, fz_stream *file, char *buf, int n, int *sl)
{
fz_error error;
error = pdf_lex(tok, file, buf, n, sl);
if (error)
return fz_rethrow(error, "cannot parse cmap token");
if (*tok == PDF_TOK_KEYWORD)
*tok = pdf_cmap_token_from_keyword(buf);
return fz_okay;
}
static fz_error
pdf_parse_cmap_name(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == PDF_TOK_NAME)
fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
else
fz_warn("expected name after CMapName in cmap");
return fz_okay;
}
static fz_error
pdf_parse_wmode(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == PDF_TOK_INT)
pdf_set_wmode(cmap, atoi(buf));
else
fz_warn("expected integer after WMode in cmap");
return fz_okay;
}
static fz_error
pdf_parse_codespace_range(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
int lo, hi;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == TOK_END_CODESPACE_RANGE)
return fz_okay;
else if (tok == PDF_TOK_STRING)
{
lo = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == PDF_TOK_STRING)
{
hi = pdf_code_from_string(buf, len);
pdf_add_codespace(cmap, lo, hi, len);
}
else break;
}
else break;
}
return fz_throw("expected string or endcodespacerange");
}
static fz_error
pdf_parse_cid_range(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
int lo, hi, dst;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == TOK_END_CID_RANGE)
return fz_okay;
else if (tok != PDF_TOK_STRING)
return fz_throw("expected string or endcidrange");
lo = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok != PDF_TOK_STRING)
return fz_throw("expected string");
hi = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok != PDF_TOK_INT)
return fz_throw("expected integer");
dst = atoi(buf);
pdf_map_range_to_range(cmap, lo, hi, dst);
}
}
static fz_error
pdf_parse_cid_char(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
int src, dst;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == TOK_END_CID_CHAR)
return fz_okay;
else if (tok != PDF_TOK_STRING)
return fz_throw("expected string or endcidchar");
src = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok != PDF_TOK_INT)
return fz_throw("expected integer");
dst = atoi(buf);
pdf_map_range_to_range(cmap, src, src, dst);
}
}
static fz_error
pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
{
fz_error error;
char buf[256];
int tok;
int len;
int dst[256];
int i;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == PDF_TOK_CLOSE_ARRAY)
return fz_okay;
/* Note: does not handle [ /Name /Name ... ] */
else if (tok != PDF_TOK_STRING)
return fz_throw("expected string or ]");
if (len / 2)
{
for (i = 0; i < len / 2; i++)
dst[i] = pdf_code_from_string(buf + i * 2, 2);
pdf_map_one_to_many(cmap, lo, dst, len / 2);
}
lo ++;
}
}
static fz_error
pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
int lo, hi, dst;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == TOK_END_BF_RANGE)
return fz_okay;
else if (tok != PDF_TOK_STRING)
return fz_throw("expected string or endbfrange");
lo = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok != PDF_TOK_STRING)
return fz_throw("expected string");
hi = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == PDF_TOK_STRING)
{
if (len == 2)
{
dst = pdf_code_from_string(buf, len);
pdf_map_range_to_range(cmap, lo, hi, dst);
}
else
{
int dststr[256];
int i;
if (len / 2)
{
for (i = 0; i < len / 2; i++)
dststr[i] = pdf_code_from_string(buf + i * 2, 2);
while (lo <= hi)
{
dststr[i-1] ++;
pdf_map_one_to_many(cmap, lo, dststr, i);
lo ++;
}
}
}
}
else if (tok == PDF_TOK_OPEN_ARRAY)
{
error = pdf_parse_bf_range_array(cmap, file, lo, hi);
if (error)
return fz_rethrow(error, "cannot map bfrange");
}
else
{
return fz_throw("expected string or array or endbfrange");
}
}
}
static fz_error
pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file)
{
fz_error error;
char buf[256];
int tok;
int len;
int dst[256];
int src;
int i;
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
if (tok == TOK_END_BF_CHAR)
return fz_okay;
else if (tok != PDF_TOK_STRING)
return fz_throw("expected string or endbfchar");
src = pdf_code_from_string(buf, len);
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
return fz_rethrow(error, "syntaxerror in cmap");
/* Note: does not handle /dstName */
if (tok != PDF_TOK_STRING)
return fz_throw("expected string");
if (len / 2)
{
for (i = 0; i < len / 2; i++)
dst[i] = pdf_code_from_string(buf + i * 2, 2);
pdf_map_one_to_many(cmap, src, dst, i);
}
}
}
fz_error
pdf_parse_cmap(pdf_cmap **cmapp, fz_stream *file)
{
fz_error error;
pdf_cmap *cmap;
char key[64];
char buf[256];
int tok;
int len;
cmap = pdf_new_cmap();
strcpy(key, ".notdef");
while (1)
{
error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap");
goto cleanup;
}
if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
break;
else if (tok == PDF_TOK_NAME)
{
if (!strcmp(buf, "CMapName"))
{
error = pdf_parse_cmap_name(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap after CMapName");
goto cleanup;
}
}
else if (!strcmp(buf, "WMode"))
{
error = pdf_parse_wmode(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap after WMode");
goto cleanup;
}
}
else
fz_strlcpy(key, buf, sizeof key);
}
else if (tok == TOK_USECMAP)
{
fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
}
else if (tok == TOK_BEGIN_CODESPACE_RANGE)
{
error = pdf_parse_codespace_range(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap codespacerange");
goto cleanup;
}
}
else if (tok == TOK_BEGIN_BF_CHAR)
{
error = pdf_parse_bf_char(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap bfchar");
goto cleanup;
}
}
else if (tok == TOK_BEGIN_CID_CHAR)
{
error = pdf_parse_cid_char(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap cidchar");
goto cleanup;
}
}
else if (tok == TOK_BEGIN_BF_RANGE)
{
error = pdf_parse_bf_range(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap bfrange");
goto cleanup;
}
}
else if (tok == TOK_BEGIN_CID_RANGE)
{
error = pdf_parse_cid_range(cmap, file);
if (error)
{
error = fz_rethrow(error, "syntaxerror in cmap cidrange");
goto cleanup;
}
}
/* ignore everything else */
}
pdf_sort_cmap(cmap);
*cmapp = cmap;
return fz_okay;
cleanup:
pdf_drop_cmap(cmap);
return error; /* already rethrown */
}