386 lines
12 KiB
C
386 lines
12 KiB
C
|
/**
|
||
|
* @file
|
||
|
* The following is a full featured parser for configuration files using
|
||
|
* basic format "key = value".
|
||
|
*
|
||
|
* Well, it's big, but it can properly manage spaces, empty lines,
|
||
|
* single and double-quoted strings, hex numbers, comments, semicolons
|
||
|
* and more. It also happens to be much more robust than the original one.
|
||
|
*
|
||
|
* @author zamaz
|
||
|
*/
|
||
|
|
||
|
#include <stddef.h>
|
||
|
#include <assert.h>
|
||
|
#include "ckvp.h"
|
||
|
|
||
|
enum {
|
||
|
STATE_ERROR = 1,
|
||
|
STATE_BEGIN, /**< initial state */
|
||
|
STATE_COMMENT, /**< currently in a comment */
|
||
|
STATE_KEY, /**< (key) currently in a key */
|
||
|
STATE_KEYBS, /**< (key) backslash */
|
||
|
STATE_KEYBSX1, /**< (key) first character of a hex value (\\x) */
|
||
|
STATE_KEYBSX2, /**< (key) second character of a hex value (\\x) */
|
||
|
STATE_KEYSQ, /**< (key) currently in a simple quoted key */
|
||
|
STATE_KEYDQ, /**< (key) currently in a double quoted key */
|
||
|
STATE_KEYDQBS, /**< (key) backslash while in double quotes */
|
||
|
STATE_KEYDQBSX1, /**< (key) first value of \\x in double quotes */
|
||
|
STATE_KEYDQBSX2, /**< (key) second value of \\x in double quotes */
|
||
|
STATE_BEQ, /**< before '=' between key and value */
|
||
|
STATE_AEQ, /**< after '=' between key and value */
|
||
|
STATE_VALUE, /**< (value) same as (key) things above, for values */
|
||
|
STATE_VALBS, /**< (value) backslash */
|
||
|
STATE_VALBSX1, /**< (value) first character of an hex value (\\x) */
|
||
|
STATE_VALBSX2, /**< (value) second character of a hex value (\\x) */
|
||
|
STATE_VALSQ, /**< (value) currently in a simple quoted value */
|
||
|
STATE_VALDQ, /**< (value) currently in a double quoted value */
|
||
|
STATE_VALDQBS, /**< (value) backslash while in double quotes */
|
||
|
STATE_VALDQBSX1, /**< (value) first value of \\x in double quotes */
|
||
|
STATE_VALDQBSX2, /**< (value) second values of \\x in double quotes */
|
||
|
STATE_VALEND, /**< end of a value, ready to take a new key */
|
||
|
ACTION_KEY = 0x0100, /**< key complete */
|
||
|
ACTION_VALUE = 0x0200, /**< value complete */
|
||
|
ACTION_ERROR = 0x0400, /**< caught an error */
|
||
|
ACTION_STORE = 0x1000, /**< character must be stored as is */
|
||
|
ACTION_STORE_MOD = 0x2000, /**< store filtered character */
|
||
|
ACTION_STORE_HEX1 = 0x4000, /**< store first hex digit */
|
||
|
ACTION_STORE_HEX2 = 0x8000 /**< store second hex digit */
|
||
|
};
|
||
|
|
||
|
#define HEX_INDICES(st) \
|
||
|
['0'] = (st), ['1'] = (st), ['2'] = (st), ['3'] = (st), \
|
||
|
['4'] = (st), ['5'] = (st), ['6'] = (st), ['7'] = (st), \
|
||
|
['8'] = (st), ['9'] = (st), ['a'] = (st), ['b'] = (st), \
|
||
|
['c'] = (st), ['d'] = (st), ['e'] = (st), ['f'] = (st), \
|
||
|
['A'] = (st), ['B'] = (st), ['C'] = (st), ['D'] = (st), \
|
||
|
['E'] = (st), ['F'] = (st)
|
||
|
|
||
|
/**
|
||
|
* ckvp_parse() takes the current state (ckvp), a buffer in[size] and returns
|
||
|
* the number of characters processed.
|
||
|
*
|
||
|
* Each time ckvp_parse() returns, ckvp->state must be checked. If no error
|
||
|
* occured, ckvp_parse() must be called again with the remaining characters
|
||
|
* if any, otherwise the next input buffer.
|
||
|
*
|
||
|
* At the end of input, ckvp_parse() must be called with a zero size.
|
||
|
*
|
||
|
* This function doesn't allocate anything.
|
||
|
*
|
||
|
* @param[in,out] ckvp Current state.
|
||
|
* @param size Number of characters in buffer "in".
|
||
|
* @param in Input buffer to parse.
|
||
|
* @return Number of characters processed.
|
||
|
*/
|
||
|
size_t ckvp_parse(ckvp_t *ckvp, size_t size, const char in[])
|
||
|
{
|
||
|
/**
|
||
|
* State machine definition:
|
||
|
*
|
||
|
* st[current_state][current_character] = next state | action
|
||
|
*
|
||
|
* Special indices for current_character are:
|
||
|
*
|
||
|
* - 0x100 for action on characters not in the list
|
||
|
* - 0x101 for action when encountering end of input while in the
|
||
|
* current state (often ACTION_ERROR)
|
||
|
*/
|
||
|
static const unsigned int st[][0x102] = {
|
||
|
[STATE_ERROR] = {
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_BEGIN] = {
|
||
|
[' '] = STATE_BEGIN,
|
||
|
['\f'] = STATE_BEGIN,
|
||
|
['\n'] = STATE_BEGIN,
|
||
|
['\r'] = STATE_BEGIN,
|
||
|
['\t'] = STATE_BEGIN,
|
||
|
['\v'] = STATE_BEGIN,
|
||
|
[';'] = (STATE_ERROR | ACTION_ERROR),
|
||
|
['#'] = STATE_COMMENT,
|
||
|
['\''] = STATE_KEYSQ,
|
||
|
['"'] = STATE_KEYDQ,
|
||
|
['\\'] = STATE_KEYBS,
|
||
|
['='] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x100] = (STATE_KEY | ACTION_STORE),
|
||
|
[0x101] = 0
|
||
|
},
|
||
|
[STATE_COMMENT] = {
|
||
|
['\n'] = STATE_BEGIN,
|
||
|
[0x100] = STATE_COMMENT,
|
||
|
[0x101] = 0
|
||
|
},
|
||
|
[STATE_KEY] = {
|
||
|
[' '] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\f'] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\n'] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\r'] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\t'] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\v'] = (STATE_BEQ | ACTION_KEY),
|
||
|
['\''] = STATE_KEYSQ,
|
||
|
['\"'] = STATE_KEYDQ,
|
||
|
[';'] = (STATE_ERROR | ACTION_ERROR),
|
||
|
['='] = (STATE_AEQ | ACTION_KEY),
|
||
|
['#'] = (STATE_ERROR | ACTION_ERROR),
|
||
|
['\\'] = STATE_KEYBS,
|
||
|
[0x100] = (STATE_KEY | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYBS] = {
|
||
|
['f'] = (STATE_KEY | ACTION_STORE_MOD),
|
||
|
['n'] = (STATE_KEY | ACTION_STORE_MOD),
|
||
|
['r'] = (STATE_KEY | ACTION_STORE_MOD),
|
||
|
['t'] = (STATE_KEY | ACTION_STORE_MOD),
|
||
|
['v'] = (STATE_KEY | ACTION_STORE_MOD),
|
||
|
['x'] = STATE_KEYBSX1,
|
||
|
['\n'] = STATE_KEY,
|
||
|
[0x100] = (STATE_KEY | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYBSX1] = {
|
||
|
HEX_INDICES(STATE_KEYBSX2 | ACTION_STORE_HEX1),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYBSX2] = {
|
||
|
HEX_INDICES(STATE_KEY | ACTION_STORE_HEX2),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYSQ] = {
|
||
|
['\''] = STATE_KEY,
|
||
|
[0x100] = (STATE_KEYSQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYDQ] = {
|
||
|
['"'] = STATE_KEY,
|
||
|
['\\'] = STATE_KEYDQBS,
|
||
|
[0x100] = (STATE_KEYDQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYDQBS] = {
|
||
|
['f'] = (STATE_KEYDQ | ACTION_STORE_MOD),
|
||
|
['n'] = (STATE_KEYDQ | ACTION_STORE_MOD),
|
||
|
['r'] = (STATE_KEYDQ | ACTION_STORE_MOD),
|
||
|
['t'] = (STATE_KEYDQ | ACTION_STORE_MOD),
|
||
|
['v'] = (STATE_KEYDQ | ACTION_STORE_MOD),
|
||
|
['x'] = STATE_KEYDQBSX1,
|
||
|
['\n'] = STATE_KEYDQ,
|
||
|
[0x100] = (STATE_KEYDQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYDQBSX1] = {
|
||
|
HEX_INDICES(STATE_KEYDQBSX2 | ACTION_STORE_HEX1),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_KEYDQBSX2] = {
|
||
|
HEX_INDICES(STATE_KEYDQ | ACTION_STORE_HEX2),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_BEQ] = {
|
||
|
[' '] = STATE_BEQ,
|
||
|
['\f'] = STATE_BEQ,
|
||
|
['\n'] = STATE_BEQ,
|
||
|
['\r'] = STATE_BEQ,
|
||
|
['\t'] = STATE_BEQ,
|
||
|
['\v'] = STATE_BEQ,
|
||
|
['='] = STATE_AEQ,
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_AEQ] = {
|
||
|
[' '] = STATE_AEQ,
|
||
|
['\f'] = STATE_AEQ,
|
||
|
['\n'] = STATE_AEQ,
|
||
|
['\r'] = STATE_AEQ,
|
||
|
['\t'] = STATE_AEQ,
|
||
|
['\v'] = STATE_AEQ,
|
||
|
['\''] = STATE_VALSQ,
|
||
|
['\"'] = STATE_VALDQ,
|
||
|
['\\'] = STATE_VALBS,
|
||
|
['='] = (STATE_ERROR | ACTION_ERROR),
|
||
|
['#'] = (STATE_COMMENT | ACTION_VALUE),
|
||
|
[';'] = (STATE_BEGIN | ACTION_VALUE),
|
||
|
[0x100] = (STATE_VALUE | ACTION_STORE),
|
||
|
[0x101] = ACTION_VALUE
|
||
|
},
|
||
|
[STATE_VALUE] = {
|
||
|
[' '] = (STATE_VALEND | ACTION_VALUE),
|
||
|
['\f'] = (STATE_VALEND | ACTION_VALUE),
|
||
|
['\n'] = (STATE_BEGIN | ACTION_VALUE),
|
||
|
['\r'] = (STATE_VALEND | ACTION_VALUE),
|
||
|
['\t'] = (STATE_VALEND | ACTION_VALUE),
|
||
|
['\v'] = (STATE_VALEND | ACTION_VALUE),
|
||
|
['\''] = STATE_VALSQ,
|
||
|
['\"'] = STATE_VALDQ,
|
||
|
[';'] = (STATE_BEGIN | ACTION_VALUE),
|
||
|
['='] = (STATE_ERROR | ACTION_ERROR),
|
||
|
['#'] = (STATE_COMMENT | ACTION_VALUE),
|
||
|
['\\'] = STATE_VALBS,
|
||
|
[0x100] = (STATE_VALUE | ACTION_STORE),
|
||
|
[0x101] = ACTION_VALUE
|
||
|
},
|
||
|
[STATE_VALBS] = {
|
||
|
['f'] = (STATE_VALUE | ACTION_STORE_MOD),
|
||
|
['n'] = (STATE_VALUE | ACTION_STORE_MOD),
|
||
|
['r'] = (STATE_VALUE | ACTION_STORE_MOD),
|
||
|
['t'] = (STATE_VALUE | ACTION_STORE_MOD),
|
||
|
['v'] = (STATE_VALUE | ACTION_STORE_MOD),
|
||
|
['x'] = STATE_VALBSX1,
|
||
|
['\n'] = STATE_VALUE,
|
||
|
[0x100] = (STATE_VALUE | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALBSX1] = {
|
||
|
HEX_INDICES(STATE_VALBSX2 | ACTION_STORE_HEX1),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALBSX2] = {
|
||
|
HEX_INDICES(STATE_VALUE | ACTION_STORE_HEX2),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALSQ] = {
|
||
|
['\''] = STATE_VALUE,
|
||
|
[0x100] = (STATE_VALSQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALDQ] = {
|
||
|
['"'] = STATE_VALUE,
|
||
|
['\\'] = STATE_VALDQBS,
|
||
|
[0x100] = (STATE_VALDQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALDQBS] = {
|
||
|
['f'] = (STATE_VALDQ | ACTION_STORE_MOD),
|
||
|
['n'] = (STATE_VALDQ | ACTION_STORE_MOD),
|
||
|
['r'] = (STATE_VALDQ | ACTION_STORE_MOD),
|
||
|
['t'] = (STATE_VALDQ | ACTION_STORE_MOD),
|
||
|
['v'] = (STATE_VALDQ | ACTION_STORE_MOD),
|
||
|
['x'] = STATE_VALDQBSX1,
|
||
|
['\n'] = STATE_VALDQ,
|
||
|
[0x100] = (STATE_VALDQ | ACTION_STORE),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALDQBSX1] = {
|
||
|
HEX_INDICES(STATE_VALDQBSX2 | ACTION_STORE_HEX1),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALDQBSX2] = {
|
||
|
HEX_INDICES(STATE_VALDQ | ACTION_STORE_HEX2),
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = ACTION_ERROR
|
||
|
},
|
||
|
[STATE_VALEND] = {
|
||
|
[' '] = STATE_VALEND,
|
||
|
['\f'] = STATE_VALEND,
|
||
|
['\n'] = STATE_BEGIN,
|
||
|
['\r'] = STATE_VALEND,
|
||
|
['\t'] = STATE_VALEND,
|
||
|
['\v'] = STATE_VALEND,
|
||
|
[';'] = STATE_BEGIN,
|
||
|
['#'] = STATE_COMMENT,
|
||
|
[0x100] = (STATE_ERROR | ACTION_ERROR),
|
||
|
[0x101] = 0
|
||
|
}
|
||
|
};
|
||
|
static const unsigned char cv[] = {
|
||
|
['f'] = '\f', ['n'] = '\n', ['r'] = '\r',
|
||
|
['t'] = '\t', ['v'] = '\v'
|
||
|
};
|
||
|
static const unsigned char hb[] = {
|
||
|
['0'] = 0x0, ['1'] = 0x1, ['2'] = 0x2, ['3'] = 0x3,
|
||
|
['4'] = 0x4, ['5'] = 0x5, ['6'] = 0x6, ['7'] = 0x7,
|
||
|
['8'] = 0x8, ['9'] = 0x9, ['a'] = 0xa, ['b'] = 0xb,
|
||
|
['c'] = 0xc, ['d'] = 0xd, ['e'] = 0xe, ['f'] = 0xf,
|
||
|
['A'] = 0xa, ['B'] = 0xb, ['C'] = 0xc, ['D'] = 0xd,
|
||
|
['E'] = 0xe, ['F'] = 0xf
|
||
|
};
|
||
|
size_t i;
|
||
|
|
||
|
assert(sizeof(unsigned int) >= 4);
|
||
|
assert(ckvp != NULL);
|
||
|
assert(in != NULL);
|
||
|
if (ckvp->state != CKVP_NONE) {
|
||
|
ckvp->out_size = 0;
|
||
|
ckvp->state = CKVP_NONE;
|
||
|
}
|
||
|
if (ckvp->internal & 0x00010000) {
|
||
|
++(ckvp->line);
|
||
|
ckvp->column = 1;
|
||
|
}
|
||
|
else if (ckvp->internal & 0x00020000)
|
||
|
++(ckvp->column);
|
||
|
ckvp->internal &= ~(0x00030000);
|
||
|
if (size == 0) {
|
||
|
assert((ckvp->internal & 0x00ff) != 0x00);
|
||
|
assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
|
||
|
if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_ERROR)
|
||
|
ckvp->state = CKVP_ERROR;
|
||
|
else if (st[(ckvp->internal & 0x00ff)][0x101] & ACTION_VALUE)
|
||
|
ckvp->state = CKVP_OUT_VALUE;
|
||
|
return 0;
|
||
|
}
|
||
|
for (i = 0; (i < size); ++i) {
|
||
|
unsigned char c = in[i];
|
||
|
unsigned int newst;
|
||
|
|
||
|
assert((ckvp->internal & 0x00ff) != 0x00);
|
||
|
assert((ckvp->internal & 0x00ff) <= STATE_VALEND);
|
||
|
if ((newst = st[(ckvp->internal & 0x00ff)][(c & 0xff)]) == 0)
|
||
|
newst = st[(ckvp->internal & 0x00ff)][0x100];
|
||
|
ckvp->internal = ((ckvp->internal & 0xffff0000) | newst);
|
||
|
assert(newst != 0);
|
||
|
if (newst & 0x0f00) {
|
||
|
if (newst & ACTION_ERROR)
|
||
|
ckvp->state = CKVP_ERROR;
|
||
|
else if (newst & ACTION_KEY)
|
||
|
ckvp->state = CKVP_OUT_KEY;
|
||
|
else if (newst & ACTION_VALUE)
|
||
|
ckvp->state = CKVP_OUT_VALUE;
|
||
|
goto endnl;
|
||
|
}
|
||
|
if (newst & 0xf000) {
|
||
|
if (newst & ACTION_STORE_HEX1) {
|
||
|
ckvp->internal &= ~(0x00f00000);
|
||
|
ckvp->internal |= (hb[c] << 20);
|
||
|
continue;
|
||
|
}
|
||
|
else if (newst & ACTION_STORE_HEX2)
|
||
|
c = (((ckvp->internal >> 16) & 0xf0) | hb[c]);
|
||
|
else if (newst & ACTION_STORE_MOD)
|
||
|
c = cv[c];
|
||
|
if (ckvp->out_size == CKVP_OUT_SIZE) {
|
||
|
ckvp->out[0] = c;
|
||
|
ckvp->out_size = 1;
|
||
|
}
|
||
|
else
|
||
|
ckvp->out[((ckvp->out_size)++)] = c;
|
||
|
if (ckvp->out_size == CKVP_OUT_SIZE) {
|
||
|
ckvp->state = CKVP_OUT_FULL;
|
||
|
goto endnl;
|
||
|
}
|
||
|
}
|
||
|
if (c == '\n') {
|
||
|
++(ckvp->line);
|
||
|
ckvp->column = 1;
|
||
|
}
|
||
|
else
|
||
|
++(ckvp->column);
|
||
|
continue;
|
||
|
endnl:
|
||
|
if (c == '\n')
|
||
|
ckvp->internal |= 0x00010000;
|
||
|
else
|
||
|
ckvp->internal |= 0x00020000;
|
||
|
return ++i;
|
||
|
}
|
||
|
return size;
|
||
|
}
|