Yogev Ezra bb2bbc6b91 Move NetSurf to /contrib folder
git-svn-id: svn://kolibrios.org@4364 a494cfbc-eb01-0410-851d-a64ba20cac60
2013-12-15 14:01:21 +00:00

914 lines
18 KiB
C

/*
* Tree construction tester.
*/
#define _GNU_SOURCE
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <hubbub/hubbub.h>
#include <hubbub/parser.h>
#include <hubbub/tree.h>
#include "utils/utils.h"
#include "testutils.h"
typedef struct attr_t attr_t;
typedef struct node_t node_t;
typedef struct buf_t buf_t;
struct attr_t {
hubbub_ns ns;
char *name;
char *value;
};
struct node_t {
enum { DOCTYPE, COMMENT, ELEMENT, CHARACTER } type;
union {
struct {
char *name;
char *public_id;
char *system_id;
} doctype;
struct {
hubbub_ns ns;
char *name;
attr_t *attrs;
size_t n_attrs;
} element;
char *content; /**< For comments, characters **/
} data;
node_t *next;
node_t *prev;
node_t *child;
node_t *parent;
uint32_t refcnt;
};
struct buf_t {
char *buf;
size_t len;
size_t pos;
};
#define NUM_NAMESPACES 7
const char * const ns_names[NUM_NAMESPACES] =
{ NULL, NULL /*html*/, "math", "svg", "xlink", "xml", "xmlns" };
node_t *Document;
static void node_print(buf_t *buf, node_t *node, unsigned depth);
static hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result);
static hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result);
static hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result);
static hubbub_error create_text(void *ctx, const hubbub_string *data, void **result);
static hubbub_error ref_node(void *ctx, void *node);
static hubbub_error unref_node(void *ctx, void *node);
static hubbub_error append_child(void *ctx, void *parent, void *child, void **result);
static hubbub_error insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result);
static hubbub_error remove_child(void *ctx, void *parent, void *child, void **result);
static hubbub_error clone_node(void *ctx, void *node, bool deep, void **result);
static hubbub_error reparent_children(void *ctx, void *node, void *new_parent);
static hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result);
static hubbub_error has_children(void *ctx, void *node, bool *result);
static hubbub_error form_associate(void *ctx, void *form, void *node);
static hubbub_error add_attributes(void *ctx, void *node,
const hubbub_attribute *attributes, uint32_t n_attributes);
static hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
static hubbub_error complete_script(void *ctx, void *script);
static void delete_node(node_t *node);
static void delete_attr(attr_t *attr);
static hubbub_tree_handler tree_handler = {
create_comment,
create_doctype,
create_element,
create_text,
ref_node,
unref_node,
append_child,
insert_before,
remove_child,
clone_node,
reparent_children,
get_parent,
has_children,
form_associate,
add_attributes,
set_quirks_mode,
NULL,
complete_script,
NULL
};
static void *myrealloc(void *ptr, size_t len, void *pw)
{
void *ret;
UNUSED(pw);
/* A half-arsed attempt at filling freshly allocated space with junk. */
if (ptr == NULL) {
ret = malloc(len);
if (ret != NULL)
memset(ret, 0xdf, len);
} else {
ret = realloc(ptr, len);
}
return ret;
}
/*
* Create, initialise, and return, a parser instance.
*/
static hubbub_parser *setup_parser(void)
{
hubbub_parser *parser;
hubbub_parser_optparams params;
assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) ==
HUBBUB_OK);
params.tree_handler = &tree_handler;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER,
&params) == HUBBUB_OK);
params.document_node = (void *)1;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE,
&params) == HUBBUB_OK);
/* Don't enable scripting -- we want the same behaviour as NetSurf.
params.enable_scripting = true;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_ENABLE_SCRIPTING,
&params) == HUBBUB_OK);
*/
return parser;
}
/*** Buffer handling bits ***/
static void buf_add(buf_t *buf, const char *str)
{
size_t len = strlen(str) + 1;
if (!buf) {
printf("%s", str);
return;
}
if (buf->buf == NULL) {
buf->len = ((len + 1024) / 1024) * 1024;
buf->buf = calloc(1, buf->len);
}
while (buf->pos + len > buf->len) {
buf->len *= 2;
buf->buf = realloc(buf->buf, buf->len);
}
strcat(buf->buf, str);
buf->pos += len;
}
int main(int argc, char **argv)
{
FILE *fp;
char buf[4096];
size_t *chunks;
size_t n_chunks;
hubbub_parser *parser;
uint32_t i;
buf_t got = { NULL, 0, 0 };
if (argc != 2) {
printf("Usage: %s <filename>\n", argv[0]);
return 1;
}
fp = fopen(argv[1], "rb");
if (fp == NULL) {
printf("Failed opening %s\n", argv[1]);
return 1;
}
/* Format:
* #chunks <n>
* <n> lines
* #data
* <data>
*/
assert(fgets(buf, sizeof(buf), fp) != NULL);
assert(strncmp(buf, "#chunks ", sizeof("#chunks ") - 1) == 0);
n_chunks = atoi(buf + sizeof("#chunks ") - 1);
chunks = malloc(n_chunks * sizeof(size_t));
assert(chunks != NULL);
for (i = 0; i < n_chunks; i++) {
assert(fgets(buf, sizeof(buf), fp) != NULL);
chunks[i] = atoi(buf);
}
assert(fgets(buf, sizeof(buf), fp) != NULL);
assert(strcmp(buf, "#data\n") == 0);
parser = setup_parser();
for (i = 0; i < n_chunks; i++) {
ssize_t bytes_read;
assert(chunks[i] <= sizeof(buf));
bytes_read = fread(buf, 1, chunks[i], fp);
assert((size_t)(bytes_read) == chunks[i]);
assert(hubbub_parser_parse_chunk(parser, (uint8_t *) buf,
chunks[i]) == HUBBUB_OK);
}
assert(hubbub_parser_completed(parser) == HUBBUB_OK);
node_print(&got, Document, 0);
printf("%s", got.buf);
hubbub_parser_destroy(parser);
while (Document) {
node_t *victim = Document;
Document = victim->next;
delete_node(victim);
}
Document = NULL;
printf("PASS\n");
fclose(fp);
free(got.buf);
return 0;
}
/*** Tree construction functions ***/
hubbub_error create_comment(void *ctx, const hubbub_string *data, void **result)
{
node_t *node = calloc(1, sizeof *node);
UNUSED(ctx);
node->type = COMMENT;
node->data.content = strndup((const char *) data->ptr, data->len);
node->refcnt = 1;
node->refcnt = 1;
*result = node;
return HUBBUB_OK;
}
hubbub_error create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result)
{
node_t *node = calloc(1, sizeof *node);
UNUSED(ctx);
node->type = DOCTYPE;
node->data.doctype.name = strndup(
(const char *) doctype->name.ptr,
doctype->name.len);
if (!doctype->public_missing) {
node->data.doctype.public_id = strndup(
(const char *) doctype->public_id.ptr,
doctype->public_id.len);
}
if (!doctype->system_missing) {
node->data.doctype.system_id = strndup(
(const char *) doctype->system_id.ptr,
doctype->system_id.len);
}
node->refcnt = 1;
*result = node;
return HUBBUB_OK;
}
hubbub_error create_element(void *ctx, const hubbub_tag *tag, void **result)
{
node_t *node = calloc(1, sizeof *node);
size_t i;
UNUSED(ctx);
assert(tag->ns < NUM_NAMESPACES);
node->type = ELEMENT;
node->data.element.ns = tag->ns;
node->data.element.name = strndup(
(const char *) tag->name.ptr,
tag->name.len);
node->data.element.n_attrs = tag->n_attributes;
node->data.element.attrs = calloc(node->data.element.n_attrs,
sizeof *node->data.element.attrs);
for (i = 0; i < tag->n_attributes; i++) {
attr_t *attr = &node->data.element.attrs[i];
assert(tag->attributes[i].ns < NUM_NAMESPACES);
attr->ns = tag->attributes[i].ns;
attr->name = strndup(
(const char *) tag->attributes[i].name.ptr,
tag->attributes[i].name.len);
attr->value = strndup(
(const char *) tag->attributes[i].value.ptr,
tag->attributes[i].value.len);
}
node->refcnt = 1;
*result = node;
return HUBBUB_OK;
}
hubbub_error create_text(void *ctx, const hubbub_string *data, void **result)
{
node_t *node = calloc(1, sizeof *node);
UNUSED(ctx);
node->type = CHARACTER;
node->data.content = strndup((const char *) data->ptr, data->len);
node->refcnt = 1;
node->refcnt = 1;
*result = node;
return HUBBUB_OK;
}
hubbub_error ref_node(void *ctx, void *node)
{
node_t *n = node;
UNUSED(ctx);
if (node != (void *) 1)
n->refcnt++;
return HUBBUB_OK;
}
hubbub_error unref_node(void *ctx, void *node)
{
node_t *n = node;
UNUSED(ctx);
if (n != (void *) 1) {
assert(n->refcnt > 0);
n->refcnt--;
printf("Unreferencing node %p (%d)\n", node, n->refcnt);
if (n->refcnt == 0 && n->parent == NULL) {
delete_node(n);
}
}
return HUBBUB_OK;
}
hubbub_error append_child(void *ctx, void *parent, void *child, void **result)
{
node_t *tparent = parent;
node_t *tchild = child;
node_t *insert = NULL;
tchild->next = tchild->prev = NULL;
#ifndef NDEBUG
printf("appending (%p):\n", (void *) tchild);
node_print(NULL, tchild, 0);
printf("to:\n");
if (parent != (void *)1)
node_print(NULL, tparent, 0);
#endif
*result = child;
if (parent == (void *)1) {
if (Document) {
insert = Document;
} else {
Document = tchild;
}
} else {
if (tparent->child == NULL) {
tparent->child = tchild;
} else {
insert = tparent->child;
}
}
if (insert) {
while (insert->next != NULL) {
insert = insert->next;
}
if (tchild->type == CHARACTER && insert->type == CHARACTER) {
insert->data.content = realloc(insert->data.content,
strlen(insert->data.content) +
strlen(tchild->data.content) + 1);
strcat(insert->data.content, tchild->data.content);
*result = insert;
} else {
insert->next = tchild;
tchild->prev = insert;
}
}
if (*result == child)
tchild->parent = tparent;
ref_node(ctx, *result);
return HUBBUB_OK;
}
/* insert 'child' before 'ref_child', under 'parent' */
hubbub_error insert_before(void *ctx, void *parent, void *child,
void *ref_child, void **result)
{
node_t *tparent = parent;
node_t *tchild = child;
node_t *tref = ref_child;
#ifndef NDEBUG
printf("inserting (%p):\n", (void *) tchild);
node_print(NULL, tchild, 0);
printf("before:\n");
node_print(NULL, tref, 0);
printf("under:\n");
if (parent != (void *)1)
node_print(NULL, tparent, 0);
#endif
if (tchild->type == CHARACTER && tref->prev &&
tref->prev->type == CHARACTER) {
node_t *insert = tref->prev;
insert->data.content = realloc(insert->data.content,
strlen(insert->data.content) +
strlen(tchild->data.content) + 1);
strcat(insert->data.content, tchild->data.content);
*result = insert;
} else {
tchild->parent = parent;
tchild->prev = tref->prev;
tchild->next = tref;
tref->prev = tchild;
if (tchild->prev)
tchild->prev->next = tchild;
else
tparent->child = tchild;
*result = child;
}
ref_node(ctx, *result);
return HUBBUB_OK;
}
hubbub_error remove_child(void *ctx, void *parent, void *child, void **result)
{
node_t *tparent = parent;
node_t *tchild = child;
assert(tparent->child);
assert(tchild->parent == tparent);
printf("Removing child %p\n", child);
if (tchild->parent->child == tchild) {
tchild->parent->child = tchild->next;
}
if (tchild->prev)
tchild->prev->next = tchild->next;
if (tchild->next)
tchild->next->prev = tchild->prev;
/* now reset all the child's pointers */
tchild->next = tchild->prev = tchild->parent = NULL;
*result = child;
ref_node(ctx, *result);
return HUBBUB_OK;
}
hubbub_error clone_node(void *ctx, void *node, bool deep, void **result)
{
node_t *old_node = node;
node_t *new_node = calloc(1, sizeof *new_node);
node_t *last;
node_t *child;
size_t i;
new_node->type = old_node->type;
switch (old_node->type) {
case DOCTYPE:
new_node->data.doctype.name =
strdup(old_node->data.doctype.name);
if (old_node->data.doctype.public_id)
new_node->data.doctype.public_id =
strdup(old_node->data.doctype.public_id);
if (old_node->data.doctype.system_id)
new_node->data.doctype.system_id =
strdup(old_node->data.doctype.system_id);
break;
case COMMENT:
case CHARACTER:
new_node->data.content = strdup(old_node->data.content);
break;
case ELEMENT:
new_node->data.element.ns = old_node->data.element.ns;
new_node->data.element.name =
strdup(old_node->data.element.name);
new_node->data.element.attrs =
calloc(old_node->data.element.n_attrs,
sizeof *new_node->data.element.attrs);
for (i = 0; i < old_node->data.element.n_attrs; i++) {
attr_t *attr = &new_node->data.element.attrs[i];
attr->ns = old_node->data.element.attrs[i].ns;
attr->name =
strdup(old_node->data.element.attrs[i].name);
attr->value =
strdup(old_node->data.element.attrs[i].value);
}
new_node->data.element.n_attrs = old_node->data.element.n_attrs;
break;
}
*result = new_node;
new_node->child = new_node->parent =
new_node->next = new_node->prev =
NULL;
new_node->refcnt = 1;
if (deep == false)
return 0;
last = NULL;
for (child = old_node->child; child != NULL;
child = child->next) {
node_t *n;
clone_node(ctx, child, true, (void **) (void *) &n);
n->refcnt = 0;
if (last == NULL) {
new_node->child = n;
} else {
last->next = n;
n->prev = last;
}
n->parent = new_node;
last = n;
}
return HUBBUB_OK;
}
/* Take all of the child nodes of "node" and append them to "new_parent" */
hubbub_error reparent_children(void *ctx, void *node, void *new_parent)
{
node_t *parent = new_parent;
node_t *old_parent = node;
node_t *insert;
node_t *kids;
UNUSED(ctx);
kids = old_parent->child;
if (!kids) return 0;
old_parent->child = NULL;
insert = parent->child;
if (!insert) {
parent->child = kids;
} else {
while (insert->next != NULL) {
insert = insert->next;
}
insert->next = kids;
kids->prev = insert;
}
while (kids) {
kids->parent = parent;
kids = kids->next;
}
return HUBBUB_OK;
}
hubbub_error get_parent(void *ctx, void *node, bool element_only, void **result)
{
UNUSED(element_only);
*result = ((node_t *)node)->parent;
if (*result != NULL)
ref_node(ctx, *result);
return HUBBUB_OK;
}
hubbub_error has_children(void *ctx, void *node, bool *result)
{
UNUSED(ctx);
*result = ((node_t *)node)->child ? true : false;
return HUBBUB_OK;
}
hubbub_error form_associate(void *ctx, void *form, void *node)
{
UNUSED(ctx);
UNUSED(form);
UNUSED(node);
return HUBBUB_OK;
}
hubbub_error add_attributes(void *ctx, void *vnode,
const hubbub_attribute *attributes, uint32_t n_attributes)
{
node_t *node = vnode;
size_t old_elems = node->data.element.n_attrs;
size_t i;
UNUSED(ctx);
node->data.element.n_attrs += n_attributes;
node->data.element.attrs = realloc(node->data.element.attrs,
node->data.element.n_attrs *
sizeof *node->data.element.attrs);
for (i = 0; i < n_attributes; i++) {
attr_t *attr = &node->data.element.attrs[old_elems + i];
assert(attributes[i].ns < NUM_NAMESPACES);
attr->ns = attributes[i].ns;
attr->name = strndup(
(const char *) attributes[i].name.ptr,
attributes[i].name.len);
attr->value = strndup(
(const char *) attributes[i].value.ptr,
attributes[i].value.len);
}
return HUBBUB_OK;
}
hubbub_error set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
{
UNUSED(ctx);
UNUSED(mode);
return HUBBUB_OK;
}
hubbub_error complete_script(void *ctx, void *script)
{
UNUSED(ctx);
UNUSED(script);
return HUBBUB_OK;
}
/*** Serialising bits ***/
static int compare_attrs(const void *a, const void *b) {
const attr_t *first = a;
const attr_t *second = b;
return strcmp(first->name, second->name);
}
static void indent(buf_t *buf, unsigned depth)
{
unsigned int i;
buf_add(buf, "| ");
for (i = 0; i < depth; i++) {
buf_add(buf, " ");
}
}
static void print_ns(buf_t *buf, hubbub_ns ns)
{
if (ns_names[ns] != NULL) {
buf_add(buf, ns_names[ns]);
buf_add(buf, " ");
}
}
static void node_print(buf_t *buf, node_t *node, unsigned depth)
{
size_t i;
if (!node) return;
indent(buf, depth);
switch (node->type)
{
case DOCTYPE:
buf_add(buf, "<!DOCTYPE ");
buf_add(buf, node->data.doctype.name);
if (node->data.doctype.public_id ||
node->data.doctype.system_id) {
if (node->data.doctype.public_id) {
buf_add(buf, " \"");
buf_add(buf, node->data.doctype.public_id);
buf_add(buf, "\" ");
} else {
buf_add(buf, "\"\" ");
}
if (node->data.doctype.system_id) {
buf_add(buf, " \"");
buf_add(buf, node->data.doctype.system_id);
buf_add(buf, "\"");
} else {
buf_add(buf, "\"\"");
}
}
buf_add(buf, ">\n");
break;
case ELEMENT:
buf_add(buf, "<");
print_ns(buf, node->data.element.ns);
buf_add(buf, node->data.element.name);
buf_add(buf, ">\n");
qsort(node->data.element.attrs, node->data.element.n_attrs,
sizeof *node->data.element.attrs,
compare_attrs);
for (i = 0; i < node->data.element.n_attrs; i++) {
indent(buf, depth + 1);
print_ns(buf, node->data.element.attrs[i].ns);
buf_add(buf, node->data.element.attrs[i].name);
buf_add(buf, "=");
buf_add(buf, "\"");
buf_add(buf, node->data.element.attrs[i].value);
buf_add(buf, "\"\n");
}
break;
case CHARACTER:
buf_add(buf, "\"");
buf_add(buf, node->data.content);
buf_add(buf, "\"\n");
break;
case COMMENT:
buf_add(buf, "<!-- ");
buf_add(buf, node->data.content);
buf_add(buf, " -->\n");
break;
default:
printf("Unexpected node type %d\n", node->type);
assert(0);
}
if (node->child) {
node_print(buf, node->child, depth + 1);
}
if (node->next) {
node_print(buf, node->next, depth);
}
}
static void delete_node(node_t *node)
{
size_t i;
node_t *c, *d;
if (node == NULL)
return;
if (node->refcnt != 0) {
printf("Node %p has non-zero refcount %d\n",
(void *) node, node->refcnt);
assert(0);
}
switch (node->type) {
case DOCTYPE:
free(node->data.doctype.name);
free(node->data.doctype.public_id);
free(node->data.doctype.system_id);
break;
case COMMENT:
case CHARACTER:
free(node->data.content);
break;
case ELEMENT:
free(node->data.element.name);
for (i = 0; i < node->data.element.n_attrs; i++)
delete_attr(&node->data.element.attrs[i]);
free(node->data.element.attrs);
break;
}
for (c = node->child; c != NULL; c = d) {
d = c->next;
delete_node(c);
}
memset(node, 0xdf, sizeof(node_t));
free(node);
}
static void delete_attr(attr_t *attr)
{
if (attr == NULL)
return;
free(attr->name);
free(attr->value);
memset(attr, 0xdf, sizeof(attr_t));
}