kolibrios/contrib/network/netsurf/libdom/bindings/xml/expat_xmlparser.c

590 lines
14 KiB
C
Raw Normal View History

/*
* This file is part of libdom.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2012 Daniel Silverstone <dsilvers@netsurf-browser.org>
*/
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <dom/dom.h>
#include "xmlparser.h"
#include "utils.h"
#include <expat.h>
/**
* expat XML parser object
*/
struct dom_xml_parser {
dom_msg msg; /**< Informational message function */
void *mctx; /**< Pointer to client data */
XML_Parser parser; /**< expat parser context */
struct dom_document *doc; /**< DOM Document we're building */
struct dom_node *current; /**< DOM node we're currently building */
bool is_cdata; /**< If the character data is cdata or text */
};
/* Binding functions */
static void
expat_xmlparser_start_element_handler(void *_parser,
const XML_Char *name,
const XML_Char **atts)
{
dom_xml_parser *parser = _parser;
dom_exception err;
dom_element *elem, *ins_elem;
dom_string *tag_name;
dom_string *namespace = NULL;
const XML_Char *ns_sep = strchr(name, '\n');
if (ns_sep != NULL) {
err = dom_string_create_interned((const uint8_t *)name,
ns_sep - name,
&namespace);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for namespace name");
return;
}
name = ns_sep + 1;
}
err = dom_string_create_interned((const uint8_t *)name,
strlen(name),
&tag_name);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for tag name");
if (namespace != NULL)
dom_string_unref(namespace);
return;
}
if (namespace == NULL)
err = dom_document_create_element(parser->doc,
tag_name, &elem);
else
err = dom_document_create_element_ns(parser->doc, namespace,
tag_name, &elem);
if (err != DOM_NO_ERR) {
if (namespace != NULL)
dom_string_unref(namespace);
dom_string_unref(tag_name);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Failed to create element '%s'", name);
return;
}
dom_string_unref(tag_name);
if (namespace != NULL)
dom_string_unref(namespace);
/* Add attributes to the element */
while (*atts) {
dom_string *key, *value;
ns_sep = strchr(*atts, '\n');
if (ns_sep != NULL) {
err = dom_string_create_interned((const uint8_t *)(*atts),
ns_sep - (*atts),
&namespace);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for attr namespace");
dom_node_unref(elem);
return;
}
} else
namespace = NULL;
if (ns_sep == NULL)
err = dom_string_create_interned((const uint8_t *)(*atts),
strlen(*atts), &key);
else
err = dom_string_create_interned((const uint8_t *)(ns_sep + 1),
strlen(ns_sep + 1),
&key);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for attribute name");
if (namespace != NULL)
dom_string_unref(namespace);
dom_node_unref(elem);
return;
}
atts++;
err = dom_string_create((const uint8_t *)(*atts),
strlen(*atts), &value);
if (err != DOM_NO_ERR) {
dom_node_unref(elem);
if (namespace != NULL)
dom_string_unref(namespace);
dom_string_unref(key);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for attribute value");
return;
}
atts++;
if (namespace == NULL)
err = dom_element_set_attribute(elem, key, value);
else
err = dom_element_set_attribute_ns(elem, namespace,
key, value);
if (namespace != NULL)
dom_string_unref(namespace);
dom_string_unref(key);
dom_string_unref(value);
if (err != DOM_NO_ERR) {
dom_node_unref(elem);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for setting attribute");
return;
}
}
err = dom_node_append_child(parser->current, (struct dom_node *) elem,
(struct dom_node **) (void *) &ins_elem);
if (err != DOM_NO_ERR) {
dom_node_unref(elem);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for appending child node");
return;
}
dom_node_unref(ins_elem);
dom_node_unref(parser->current);
parser->current = (struct dom_node *)elem; /* Steal initial ref */
}
static void
expat_xmlparser_end_element_handler(void *_parser,
const XML_Char *name)
{
dom_xml_parser *parser = _parser;
dom_exception err;
dom_node *parent;
UNUSED(name);
err = dom_node_get_parent_node(parser->current, &parent);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Unable to find a parent while closing element.");
return;
}
dom_node_unref(parser->current);
parser->current = parent; /* Takes the ref given by get_parent_node */
}
static void
expat_xmlparser_start_cdata_handler(void *_parser)
{
dom_xml_parser *parser = _parser;
parser->is_cdata = true;
}
static void
expat_xmlparser_end_cdata_handler(void *_parser)
{
dom_xml_parser *parser = _parser;
parser->is_cdata = false;
}
static void
expat_xmlparser_cdata_handler(void *_parser,
const XML_Char *s,
int len)
{
dom_xml_parser *parser = _parser;
dom_string *data;
dom_exception err;
struct dom_node *cdata, *ins_cdata, *lastchild = NULL;
dom_node_type ntype = 0;
err = dom_string_create((const uint8_t *)s, len, &data);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for cdata section contents");
return;
}
err = dom_node_get_last_child(parser->current, &lastchild);
if (err == DOM_NO_ERR && lastchild != NULL) {
err = dom_node_get_node_type(lastchild, &ntype);
}
if (err != DOM_NO_ERR) {
dom_string_unref(data);
if (lastchild != NULL)
dom_node_unref(lastchild);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for cdata section");
return;
}
if (ntype == DOM_TEXT_NODE && parser->is_cdata == false) {
/* We can append this text instead */
err = dom_characterdata_append_data(
(dom_characterdata *)lastchild, data);
dom_string_unref(data);
if (lastchild != NULL)
dom_node_unref(lastchild);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for cdata section");
}
return;
}
if (lastchild != NULL)
dom_node_unref(lastchild);
/* We can't append directly, so make a new node */
err = parser->is_cdata ?
dom_document_create_cdata_section(parser->doc, data,
(dom_cdata_section **) (void *) &cdata) :
dom_document_create_text_node(parser->doc, data,
(dom_text **) (void *) &cdata);
if (err != DOM_NO_ERR) {
dom_string_unref(data);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for cdata section");
return;
}
/* No longer need data */
dom_string_unref(data);
/* Append cdata section to parent */
err = dom_node_append_child(parser->current, cdata, &ins_cdata);
if (err != DOM_NO_ERR) {
dom_node_unref((struct dom_node *) cdata);
parser->msg(DOM_MSG_ERROR, parser->mctx,
"Failed attaching cdata section");
return;
}
/* We're not interested in the inserted cdata section */
if (ins_cdata != NULL)
dom_node_unref(ins_cdata);
/* No longer interested in cdata section */
dom_node_unref(cdata);
}
static int
expat_xmlparser_external_entity_ref_handler(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *system_id,
const XML_Char *public_id)
{
FILE *fh;
XML_Parser subparser;
unsigned char data[1024];
size_t len;
enum XML_Status status;
UNUSED(base);
UNUSED(public_id);
if (system_id == NULL)
return XML_STATUS_OK;
fh = fopen(system_id, "r");
if (fh == NULL)
return XML_STATUS_OK;
subparser = XML_ExternalEntityParserCreate(parser,
context,
NULL);
if (subparser == NULL) {
fclose(fh);
return XML_STATUS_OK;
}
/* Parse the file bit by bit */
while ((len = fread(data, 1, 1024, fh)) > 0) {
status = XML_Parse(subparser, (const char *)data, len, 0);
if (status != XML_STATUS_OK) {
XML_ParserFree(subparser);
fclose(fh);
return XML_STATUS_OK;
}
}
XML_Parse(subparser, "", 0, 1);
XML_ParserFree(subparser);
fclose(fh);
return XML_STATUS_OK;
}
static void
expat_xmlparser_comment_handler(void *_parser,
const XML_Char *_comment)
{
dom_xml_parser *parser = _parser;
struct dom_comment *comment, *ins_comment = NULL;
dom_string *data;
dom_exception err;
/* Create DOM string data for comment */
err = dom_string_create((const uint8_t *)_comment,
strlen((const char *) _comment), &data);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for comment data");
return;
}
/* Create comment */
err = dom_document_create_comment(parser->doc, data, &comment);
if (err != DOM_NO_ERR) {
dom_string_unref(data);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"No memory for comment node");
return;
}
/* No longer need data */
dom_string_unref(data);
/* Append comment to parent */
err = dom_node_append_child(parser->current, (struct dom_node *) comment,
(struct dom_node **) (void *) &ins_comment);
if (err != DOM_NO_ERR) {
dom_node_unref((struct dom_node *) comment);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Failed attaching comment node");
return;
}
/* We're not interested in the inserted comment */
if (ins_comment != NULL)
dom_node_unref((struct dom_node *) ins_comment);
/* No longer interested in comment */
dom_node_unref((struct dom_node *) comment);
}
static void
expat_xmlparser_start_doctype_decl_handler(void *_parser,
const XML_Char *doctype_name,
const XML_Char *system_id,
const XML_Char *public_id,
int has_internal_subset)
{
dom_xml_parser *parser = _parser;
struct dom_document_type *doctype, *ins_doctype = NULL;
dom_exception err;
UNUSED(has_internal_subset);
err = dom_implementation_create_document_type(
doctype_name, system_id ? system_id : "",
public_id ? public_id : "",
&doctype);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Failed to create document type");
return;
}
/* Add doctype to document */
err = dom_node_append_child(parser->doc, (struct dom_node *) doctype,
(struct dom_node **) (void *) &ins_doctype);
if (err != DOM_NO_ERR) {
dom_node_unref((struct dom_node *) doctype);
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Failed attaching doctype");
return;
}
/* Not interested in inserted node */
if (ins_doctype != NULL)
dom_node_unref((struct dom_node *) ins_doctype);
/* No longer interested in doctype */
dom_node_unref((struct dom_node *) doctype);
}
static void
expat_xmlparser_unknown_data_handler(void *_parser,
const XML_Char *s,
int len)
{
UNUSED(_parser);
UNUSED(s);
UNUSED(len);
}
/**
* Create an XML parser instance
*
* \param enc Source charset, or NULL
* \param int_enc Desired charset of document buffer (UTF-8 or UTF-16)
* \param msg Informational message function
* \param mctx Pointer to client-specific private data
* \return Pointer to instance, or NULL on memory exhaustion
*
* int_enc is ignored due to it being made of bees.
*/
dom_xml_parser *
dom_xml_parser_create(const char *enc, const char *int_enc,
dom_msg msg, void *mctx, dom_document **document)
{
dom_xml_parser *parser;
dom_exception err;
UNUSED(int_enc);
parser = calloc(sizeof(*parser), 1);
if (parser == NULL) {
msg(DOM_MSG_CRITICAL, mctx, "No memory for parser");
return NULL;
}
parser->msg = msg;
parser->mctx = mctx;
parser->parser = XML_ParserCreateNS(enc, '\n');
if (parser->parser == NULL) {
free(parser);
msg(DOM_MSG_CRITICAL, mctx, "No memory for parser");
return NULL;
}
parser->doc = NULL;
err = dom_implementation_create_document(
DOM_IMPLEMENTATION_XML,
/* namespace */ NULL,
/* qname */ NULL,
/* doctype */ NULL,
NULL,
NULL,
document);
if (err != DOM_NO_ERR) {
parser->msg(DOM_MSG_CRITICAL, parser->mctx,
"Failed creating document");
XML_ParserFree(parser->parser);
free(parser);
return NULL;
}
parser->doc = (dom_document *) dom_node_ref(*document);
XML_SetUserData(parser->parser, parser);
XML_SetElementHandler(parser->parser,
expat_xmlparser_start_element_handler,
expat_xmlparser_end_element_handler);
XML_SetCdataSectionHandler(parser->parser,
expat_xmlparser_start_cdata_handler,
expat_xmlparser_end_cdata_handler);
XML_SetCharacterDataHandler(parser->parser,
expat_xmlparser_cdata_handler);
XML_SetParamEntityParsing(parser->parser,
XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser->parser,
expat_xmlparser_external_entity_ref_handler);
XML_SetCommentHandler(parser->parser,
expat_xmlparser_comment_handler);
XML_SetStartDoctypeDeclHandler(parser->parser,
expat_xmlparser_start_doctype_decl_handler);
XML_SetDefaultHandlerExpand(parser->parser,
expat_xmlparser_unknown_data_handler);
parser->current = dom_node_ref(parser->doc);
parser->is_cdata = false;
return parser;
}
/**
* Destroy an XML parser instance
*
* \param parser The parser instance to destroy
*/
void
dom_xml_parser_destroy(dom_xml_parser *parser)
{
XML_ParserFree(parser->parser);
if (parser->current != NULL)
dom_node_unref(parser->current);
dom_node_unref(parser->doc);
free(parser);
}
/**
* Parse a chunk of data
*
* \param parser The XML parser instance to use for parsing
* \param data Pointer to data chunk
* \param len Byte length of data chunk
* \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <expat error> on failure
*/
dom_xml_error
dom_xml_parser_parse_chunk(dom_xml_parser *parser, uint8_t *data, size_t len)
{
enum XML_Status status;
status = XML_Parse(parser->parser, (const char *)data, len, 0);
if (status != XML_STATUS_OK) {
parser->msg(DOM_MSG_ERROR, parser->mctx,
"XML_Parse failed: %d", status);
return DOM_XML_EXTERNAL_ERR | status;
}
return DOM_XML_OK;
}
/**
* Notify parser that datastream is empty
*
* \param parser The XML parser instance to notify
* \return DOM_XML_OK on success, DOM_XML_EXTERNAL_ERR | <expat error> on failure
*
* This will force any remaining data through the parser
*/
dom_xml_error
dom_xml_parser_completed(dom_xml_parser *parser)
{
enum XML_Status status;
status = XML_Parse(parser->parser, "", 0, 1);
if (status != XML_STATUS_OK) {
parser->msg(DOM_MSG_ERROR, parser->mctx,
"XML_Parse failed: %d", status);
return DOM_XML_EXTERNAL_ERR | status;
}
return DOM_XML_OK;
}