776 lines
15 KiB
C
776 lines
15 KiB
C
|
/*
|
||
|
* PDF cleaning tool: general purpose pdf syntax washer.
|
||
|
*
|
||
|
* Rewrite PDF with pretty printed objects.
|
||
|
* Garbage collect unreachable objects.
|
||
|
* Inflate compressed streams.
|
||
|
* Create subset documents.
|
||
|
*
|
||
|
* TODO: linearize document for fast web view
|
||
|
*/
|
||
|
|
||
|
#include "fitz.h"
|
||
|
#include "mupdf.h"
|
||
|
|
||
|
static FILE *out = NULL;
|
||
|
|
||
|
static char *uselist = NULL;
|
||
|
static int *ofslist = NULL;
|
||
|
static int *genlist = NULL;
|
||
|
static int *renumbermap = NULL;
|
||
|
|
||
|
static int dogarbage = 0;
|
||
|
static int doexpand = 0;
|
||
|
static int doascii = 0;
|
||
|
|
||
|
static pdf_xref *xref = NULL;
|
||
|
|
||
|
void die(fz_error error)
|
||
|
{
|
||
|
fz_catch(error, "aborting");
|
||
|
if (xref)
|
||
|
pdf_free_xref(xref);
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
static void usage(void)
|
||
|
{
|
||
|
fprintf(stderr,
|
||
|
"usage: pdfclean [options] input.pdf [output.pdf] [pages]\n"
|
||
|
"\t-p -\tpassword\n"
|
||
|
"\t-g\tgarbage collect unused objects\n"
|
||
|
"\t-gg\tin addition to -g compact xref table\n"
|
||
|
"\t-ggg\tin addition to -gg merge duplicate objects\n"
|
||
|
"\t-d\tdecompress streams\n"
|
||
|
"\t-a\tascii hex encode binary streams\n"
|
||
|
"\tpages\tcomma separated list of ranges\n");
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Garbage collect objects not reachable from the trailer.
|
||
|
*/
|
||
|
|
||
|
static void sweepref(fz_obj *ref);
|
||
|
|
||
|
static void sweepobj(fz_obj *obj)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
if (fz_is_indirect(obj))
|
||
|
sweepref(obj);
|
||
|
|
||
|
else if (fz_is_dict(obj))
|
||
|
for (i = 0; i < fz_dict_len(obj); i++)
|
||
|
sweepobj(fz_dict_get_val(obj, i));
|
||
|
|
||
|
else if (fz_is_array(obj))
|
||
|
for (i = 0; i < fz_array_len(obj); i++)
|
||
|
sweepobj(fz_array_get(obj, i));
|
||
|
}
|
||
|
|
||
|
static void sweepref(fz_obj *obj)
|
||
|
{
|
||
|
int num = fz_to_num(obj);
|
||
|
int gen = fz_to_gen(obj);
|
||
|
|
||
|
if (num < 0 || num >= xref->len)
|
||
|
return;
|
||
|
if (uselist[num])
|
||
|
return;
|
||
|
|
||
|
uselist[num] = 1;
|
||
|
|
||
|
/* Bake in /Length in stream objects */
|
||
|
if (pdf_is_stream(xref, num, gen))
|
||
|
{
|
||
|
fz_obj *len = fz_dict_gets(obj, "Length");
|
||
|
if (fz_is_indirect(len))
|
||
|
{
|
||
|
uselist[fz_to_num(len)] = 0;
|
||
|
len = fz_resolve_indirect(len);
|
||
|
fz_dict_puts(obj, "Length", len);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sweepobj(fz_resolve_indirect(obj));
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Scan for and remove duplicate objects (slow)
|
||
|
*/
|
||
|
|
||
|
static void removeduplicateobjs(void)
|
||
|
{
|
||
|
int num, other;
|
||
|
|
||
|
for (num = 1; num < xref->len; num++)
|
||
|
{
|
||
|
/* Only compare an object to objects preceeding it */
|
||
|
for (other = 1; other < num; other++)
|
||
|
{
|
||
|
fz_obj *a, *b;
|
||
|
|
||
|
if (num == other || !uselist[num] || !uselist[other])
|
||
|
continue;
|
||
|
|
||
|
/*
|
||
|
* Comparing stream objects data contents would take too long.
|
||
|
*
|
||
|
* pdf_is_stream calls pdf_cache_object and ensures
|
||
|
* that the xref table has the objects loaded.
|
||
|
*/
|
||
|
if (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0))
|
||
|
continue;
|
||
|
|
||
|
a = xref->table[num].obj;
|
||
|
b = xref->table[other].obj;
|
||
|
|
||
|
a = fz_resolve_indirect(a);
|
||
|
b = fz_resolve_indirect(b);
|
||
|
|
||
|
if (fz_objcmp(a, b))
|
||
|
continue;
|
||
|
|
||
|
/* Keep the lowest numbered object */
|
||
|
renumbermap[num] = MIN(num, other);
|
||
|
renumbermap[other] = MIN(num, other);
|
||
|
uselist[MAX(num, other)] = 0;
|
||
|
|
||
|
/* One duplicate was found, do not look for another */
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Renumber objects sequentially so the xref is more compact
|
||
|
*/
|
||
|
|
||
|
static void compactxref(void)
|
||
|
{
|
||
|
int num, newnum;
|
||
|
|
||
|
/*
|
||
|
* Update renumbermap in-place, clustering all used
|
||
|
* objects together at low object ids. Objects that
|
||
|
* already should be renumbered will have their new
|
||
|
* object ids be updated to reflect the compaction.
|
||
|
*/
|
||
|
|
||
|
newnum = 1;
|
||
|
for (num = 1; num < xref->len; num++)
|
||
|
{
|
||
|
if (uselist[num] && renumbermap[num] == num)
|
||
|
renumbermap[num] = newnum++;
|
||
|
else if (renumbermap[num] != num)
|
||
|
renumbermap[num] = renumbermap[renumbermap[num]];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Update indirect objects according to renumbering established when
|
||
|
* removing duplicate objects and compacting the xref.
|
||
|
*/
|
||
|
|
||
|
static void renumberobj(fz_obj *obj)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
if (fz_is_dict(obj))
|
||
|
{
|
||
|
for (i = 0; i < fz_dict_len(obj); i++)
|
||
|
{
|
||
|
fz_obj *key = fz_dict_get_key(obj, i);
|
||
|
fz_obj *val = fz_dict_get_val(obj, i);
|
||
|
if (fz_is_indirect(val))
|
||
|
{
|
||
|
val = fz_new_indirect(renumbermap[fz_to_num(val)], 0, xref);
|
||
|
fz_dict_put(obj, key, val);
|
||
|
fz_drop_obj(val);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
renumberobj(val);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
else if (fz_is_array(obj))
|
||
|
{
|
||
|
for (i = 0; i < fz_array_len(obj); i++)
|
||
|
{
|
||
|
fz_obj *val = fz_array_get(obj, i);
|
||
|
if (fz_is_indirect(val))
|
||
|
{
|
||
|
val = fz_new_indirect(renumbermap[fz_to_num(val)], 0, xref);
|
||
|
fz_array_put(obj, i, val);
|
||
|
fz_drop_obj(val);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
renumberobj(val);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void renumberobjs(void)
|
||
|
{
|
||
|
pdf_xref_entry *oldxref;
|
||
|
int newlen;
|
||
|
int num;
|
||
|
|
||
|
/* Apply renumber map to indirect references in all objects in xref */
|
||
|
renumberobj(xref->trailer);
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
fz_obj *obj = xref->table[num].obj;
|
||
|
|
||
|
if (fz_is_indirect(obj))
|
||
|
{
|
||
|
obj = fz_new_indirect(renumbermap[fz_to_num(obj)], 0, xref);
|
||
|
pdf_update_object(xref, num, 0, obj);
|
||
|
fz_drop_obj(obj);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
renumberobj(obj);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Create new table for the reordered, compacted xref */
|
||
|
oldxref = xref->table;
|
||
|
xref->table = fz_calloc(xref->len, sizeof(pdf_xref_entry));
|
||
|
xref->table[0] = oldxref[0];
|
||
|
|
||
|
/* Move used objects into the new compacted xref */
|
||
|
newlen = 0;
|
||
|
for (num = 1; num < xref->len; num++)
|
||
|
{
|
||
|
if (uselist[num])
|
||
|
{
|
||
|
if (newlen < renumbermap[num])
|
||
|
newlen = renumbermap[num];
|
||
|
xref->table[renumbermap[num]] = oldxref[num];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (oldxref[num].obj)
|
||
|
fz_drop_obj(oldxref[num].obj);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fz_free(oldxref);
|
||
|
|
||
|
/* Update the used objects count in compacted xref */
|
||
|
xref->len = newlen + 1;
|
||
|
|
||
|
/* Update list of used objects to fit with compacted xref */
|
||
|
for (num = 1; num < xref->len; num++)
|
||
|
uselist[num] = 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Recreate page tree to only retain specified pages.
|
||
|
*/
|
||
|
|
||
|
static void retainpages(int argc, char **argv)
|
||
|
{
|
||
|
fz_error error;
|
||
|
fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent;
|
||
|
|
||
|
/* Load the old page tree */
|
||
|
error = pdf_load_page_tree(xref);
|
||
|
if (error)
|
||
|
die(fz_rethrow(error, "cannot load page tree"));
|
||
|
|
||
|
/* Keep only pages/type entry to avoid references to unretained pages */
|
||
|
oldroot = fz_dict_gets(xref->trailer, "Root");
|
||
|
pages = fz_dict_gets(oldroot, "Pages");
|
||
|
|
||
|
root = fz_new_dict(2);
|
||
|
fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type"));
|
||
|
fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages"));
|
||
|
|
||
|
pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root);
|
||
|
|
||
|
fz_drop_obj(root);
|
||
|
|
||
|
/* Create a new kids array with only the pages we want to keep */
|
||
|
parent = fz_new_indirect(fz_to_num(pages), fz_to_gen(pages), xref);
|
||
|
kids = fz_new_array(1);
|
||
|
|
||
|
/* Retain pages specified */
|
||
|
while (argc - fz_optind)
|
||
|
{
|
||
|
int page, spage, epage;
|
||
|
char *spec, *dash;
|
||
|
char *pagelist = argv[fz_optind];
|
||
|
|
||
|
spec = fz_strsep(&pagelist, ",");
|
||
|
while (spec)
|
||
|
{
|
||
|
dash = strchr(spec, '-');
|
||
|
|
||
|
if (dash == spec)
|
||
|
spage = epage = pdf_count_pages(xref);
|
||
|
else
|
||
|
spage = epage = atoi(spec);
|
||
|
|
||
|
if (dash)
|
||
|
{
|
||
|
if (strlen(dash) > 1)
|
||
|
epage = atoi(dash + 1);
|
||
|
else
|
||
|
epage = pdf_count_pages(xref);
|
||
|
}
|
||
|
|
||
|
if (spage > epage)
|
||
|
page = spage, spage = epage, epage = page;
|
||
|
|
||
|
if (spage < 1)
|
||
|
spage = 1;
|
||
|
if (epage > pdf_count_pages(xref))
|
||
|
epage = pdf_count_pages(xref);
|
||
|
|
||
|
for (page = spage; page <= epage; page++)
|
||
|
{
|
||
|
fz_obj *pageobj = xref->page_objs[page-1];
|
||
|
fz_obj *pageref = xref->page_refs[page-1];
|
||
|
|
||
|
fz_dict_puts(pageobj, "Parent", parent);
|
||
|
|
||
|
/* Store page object in new kids array */
|
||
|
fz_array_push(kids, pageref);
|
||
|
}
|
||
|
|
||
|
spec = fz_strsep(&pagelist, ",");
|
||
|
}
|
||
|
|
||
|
fz_optind++;
|
||
|
}
|
||
|
|
||
|
fz_drop_obj(parent);
|
||
|
|
||
|
/* Update page count and kids array */
|
||
|
countobj = fz_new_int(fz_array_len(kids));
|
||
|
fz_dict_puts(pages, "Count", countobj);
|
||
|
fz_drop_obj(countobj);
|
||
|
fz_dict_puts(pages, "Kids", kids);
|
||
|
fz_drop_obj(kids);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Make sure we have loaded objects from object streams.
|
||
|
*/
|
||
|
|
||
|
static void preloadobjstms(void)
|
||
|
{
|
||
|
fz_error error;
|
||
|
fz_obj *obj;
|
||
|
int num;
|
||
|
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
if (xref->table[num].type == 'o')
|
||
|
{
|
||
|
error = pdf_load_object(&obj, xref, num, 0);
|
||
|
if (error)
|
||
|
die(error);
|
||
|
fz_drop_obj(obj);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Save streams and objects to the output
|
||
|
*/
|
||
|
|
||
|
static inline int isbinary(int c)
|
||
|
{
|
||
|
if (c == '\n' || c == '\r' || c == '\t')
|
||
|
return 0;
|
||
|
return c < 32 || c > 127;
|
||
|
}
|
||
|
|
||
|
static int isbinarystream(fz_buffer *buf)
|
||
|
{
|
||
|
int i;
|
||
|
for (i = 0; i < buf->len; i++)
|
||
|
if (isbinary(buf->data[i]))
|
||
|
return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static fz_buffer *hexbuf(unsigned char *p, int n)
|
||
|
{
|
||
|
static const char hex[16] = "0123456789abcdef";
|
||
|
fz_buffer *buf;
|
||
|
int x = 0;
|
||
|
|
||
|
buf = fz_new_buffer(n * 2 + (n / 32) + 2);
|
||
|
|
||
|
while (n--)
|
||
|
{
|
||
|
buf->data[buf->len++] = hex[*p >> 4];
|
||
|
buf->data[buf->len++] = hex[*p & 15];
|
||
|
if (++x == 32)
|
||
|
{
|
||
|
buf->data[buf->len++] = '\n';
|
||
|
x = 0;
|
||
|
}
|
||
|
p++;
|
||
|
}
|
||
|
|
||
|
buf->data[buf->len++] = '>';
|
||
|
buf->data[buf->len++] = '\n';
|
||
|
|
||
|
return buf;
|
||
|
}
|
||
|
|
||
|
static void addhexfilter(fz_obj *dict)
|
||
|
{
|
||
|
fz_obj *f, *dp, *newf, *newdp;
|
||
|
fz_obj *ahx, *nullobj;
|
||
|
|
||
|
ahx = fz_new_name("ASCIIHexDecode");
|
||
|
nullobj = fz_new_null();
|
||
|
newf = newdp = NULL;
|
||
|
|
||
|
f = fz_dict_gets(dict, "Filter");
|
||
|
dp = fz_dict_gets(dict, "DecodeParms");
|
||
|
|
||
|
if (fz_is_name(f))
|
||
|
{
|
||
|
newf = fz_new_array(2);
|
||
|
fz_array_push(newf, ahx);
|
||
|
fz_array_push(newf, f);
|
||
|
f = newf;
|
||
|
if (fz_is_dict(dp))
|
||
|
{
|
||
|
newdp = fz_new_array(2);
|
||
|
fz_array_push(newdp, nullobj);
|
||
|
fz_array_push(newdp, dp);
|
||
|
dp = newdp;
|
||
|
}
|
||
|
}
|
||
|
else if (fz_is_array(f))
|
||
|
{
|
||
|
fz_array_insert(f, ahx);
|
||
|
if (fz_is_array(dp))
|
||
|
fz_array_insert(dp, nullobj);
|
||
|
}
|
||
|
else
|
||
|
f = ahx;
|
||
|
|
||
|
fz_dict_puts(dict, "Filter", f);
|
||
|
if (dp)
|
||
|
fz_dict_puts(dict, "DecodeParms", dp);
|
||
|
|
||
|
fz_drop_obj(ahx);
|
||
|
fz_drop_obj(nullobj);
|
||
|
if (newf)
|
||
|
fz_drop_obj(newf);
|
||
|
if (newdp)
|
||
|
fz_drop_obj(newdp);
|
||
|
}
|
||
|
|
||
|
static void copystream(fz_obj *obj, int num, int gen)
|
||
|
{
|
||
|
fz_error error;
|
||
|
fz_buffer *buf, *tmp;
|
||
|
fz_obj *newlen;
|
||
|
|
||
|
error = pdf_load_raw_stream(&buf, xref, num, gen);
|
||
|
if (error)
|
||
|
die(error);
|
||
|
|
||
|
if (doascii && isbinarystream(buf))
|
||
|
{
|
||
|
tmp = hexbuf(buf->data, buf->len);
|
||
|
fz_drop_buffer(buf);
|
||
|
buf = tmp;
|
||
|
|
||
|
addhexfilter(obj);
|
||
|
|
||
|
newlen = fz_new_int(buf->len);
|
||
|
fz_dict_puts(obj, "Length", newlen);
|
||
|
fz_drop_obj(newlen);
|
||
|
}
|
||
|
|
||
|
fprintf(out, "%d %d obj\n", num, gen);
|
||
|
fz_fprint_obj(out, obj, !doexpand);
|
||
|
fprintf(out, "stream\n");
|
||
|
fwrite(buf->data, 1, buf->len, out);
|
||
|
fprintf(out, "endstream\nendobj\n\n");
|
||
|
|
||
|
fz_drop_buffer(buf);
|
||
|
}
|
||
|
|
||
|
static void expandstream(fz_obj *obj, int num, int gen)
|
||
|
{
|
||
|
fz_error error;
|
||
|
fz_buffer *buf, *tmp;
|
||
|
fz_obj *newlen;
|
||
|
|
||
|
error = pdf_load_stream(&buf, xref, num, gen);
|
||
|
if (error)
|
||
|
die(error);
|
||
|
|
||
|
fz_dict_dels(obj, "Filter");
|
||
|
fz_dict_dels(obj, "DecodeParms");
|
||
|
|
||
|
if (doascii && isbinarystream(buf))
|
||
|
{
|
||
|
tmp = hexbuf(buf->data, buf->len);
|
||
|
fz_drop_buffer(buf);
|
||
|
buf = tmp;
|
||
|
|
||
|
addhexfilter(obj);
|
||
|
}
|
||
|
|
||
|
newlen = fz_new_int(buf->len);
|
||
|
fz_dict_puts(obj, "Length", newlen);
|
||
|
fz_drop_obj(newlen);
|
||
|
|
||
|
fprintf(out, "%d %d obj\n", num, gen);
|
||
|
fz_fprint_obj(out, obj, !doexpand);
|
||
|
fprintf(out, "stream\n");
|
||
|
fwrite(buf->data, 1, buf->len, out);
|
||
|
fprintf(out, "endstream\nendobj\n\n");
|
||
|
|
||
|
fz_drop_buffer(buf);
|
||
|
}
|
||
|
|
||
|
static void writeobject(int num, int gen)
|
||
|
{
|
||
|
fz_error error;
|
||
|
fz_obj *obj;
|
||
|
fz_obj *type;
|
||
|
|
||
|
error = pdf_load_object(&obj, xref, num, gen);
|
||
|
if (error)
|
||
|
die(error);
|
||
|
|
||
|
/* skip ObjStm and XRef objects */
|
||
|
if (fz_is_dict(obj))
|
||
|
{
|
||
|
type = fz_dict_gets(obj, "Type");
|
||
|
if (fz_is_name(type) && !strcmp(fz_to_name(type), "ObjStm"))
|
||
|
{
|
||
|
uselist[num] = 0;
|
||
|
fz_drop_obj(obj);
|
||
|
return;
|
||
|
}
|
||
|
if (fz_is_name(type) && !strcmp(fz_to_name(type), "XRef"))
|
||
|
{
|
||
|
uselist[num] = 0;
|
||
|
fz_drop_obj(obj);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!pdf_is_stream(xref, num, gen))
|
||
|
{
|
||
|
fprintf(out, "%d %d obj\n", num, gen);
|
||
|
fz_fprint_obj(out, obj, !doexpand);
|
||
|
fprintf(out, "endobj\n\n");
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (doexpand && !pdf_is_jpx_image(obj))
|
||
|
expandstream(obj, num, gen);
|
||
|
else
|
||
|
copystream(obj, num, gen);
|
||
|
}
|
||
|
|
||
|
fz_drop_obj(obj);
|
||
|
}
|
||
|
|
||
|
static void writexref(void)
|
||
|
{
|
||
|
fz_obj *trailer;
|
||
|
fz_obj *obj;
|
||
|
int startxref;
|
||
|
int num;
|
||
|
|
||
|
startxref = ftell(out);
|
||
|
|
||
|
fprintf(out, "xref\n0 %d\n", xref->len);
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
if (uselist[num])
|
||
|
fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]);
|
||
|
else
|
||
|
fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]);
|
||
|
}
|
||
|
fprintf(out, "\n");
|
||
|
|
||
|
trailer = fz_new_dict(5);
|
||
|
|
||
|
obj = fz_new_int(xref->len);
|
||
|
fz_dict_puts(trailer, "Size", obj);
|
||
|
fz_drop_obj(obj);
|
||
|
|
||
|
obj = fz_dict_gets(xref->trailer, "Info");
|
||
|
if (obj)
|
||
|
fz_dict_puts(trailer, "Info", obj);
|
||
|
|
||
|
obj = fz_dict_gets(xref->trailer, "Root");
|
||
|
if (obj)
|
||
|
fz_dict_puts(trailer, "Root", obj);
|
||
|
|
||
|
obj = fz_dict_gets(xref->trailer, "ID");
|
||
|
if (obj)
|
||
|
fz_dict_puts(trailer, "ID", obj);
|
||
|
|
||
|
fprintf(out, "trailer\n");
|
||
|
fz_fprint_obj(out, trailer, !doexpand);
|
||
|
fprintf(out, "\n");
|
||
|
|
||
|
fz_drop_obj(trailer);
|
||
|
|
||
|
fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
|
||
|
}
|
||
|
|
||
|
static void writepdf(void)
|
||
|
{
|
||
|
int lastfree;
|
||
|
int num;
|
||
|
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
if (xref->table[num].type == 'f')
|
||
|
genlist[num] = xref->table[num].gen;
|
||
|
if (xref->table[num].type == 'n')
|
||
|
genlist[num] = xref->table[num].gen;
|
||
|
if (xref->table[num].type == 'o')
|
||
|
genlist[num] = 0;
|
||
|
|
||
|
if (dogarbage && !uselist[num])
|
||
|
continue;
|
||
|
|
||
|
if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
|
||
|
{
|
||
|
uselist[num] = 1;
|
||
|
ofslist[num] = ftell(out);
|
||
|
writeobject(num, genlist[num]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Construct linked list of free object slots */
|
||
|
lastfree = 0;
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
if (!uselist[num])
|
||
|
{
|
||
|
genlist[num]++;
|
||
|
ofslist[lastfree] = num;
|
||
|
lastfree = num;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
writexref();
|
||
|
}
|
||
|
|
||
|
int main(int argc, char **argv)
|
||
|
{
|
||
|
fz_error error;
|
||
|
char *infile;
|
||
|
char *outfile = "out.pdf";
|
||
|
char *password = "";
|
||
|
int c, num;
|
||
|
int subset;
|
||
|
|
||
|
while ((c = fz_getopt(argc, argv, "adgp:")) != -1)
|
||
|
{
|
||
|
switch (c)
|
||
|
{
|
||
|
case 'p': password = fz_optarg; break;
|
||
|
case 'g': dogarbage ++; break;
|
||
|
case 'd': doexpand ++; break;
|
||
|
case 'a': doascii ++; break;
|
||
|
default: usage(); break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (argc - fz_optind < 1)
|
||
|
usage();
|
||
|
|
||
|
infile = argv[fz_optind++];
|
||
|
|
||
|
if (argc - fz_optind > 0 &&
|
||
|
(strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")))
|
||
|
{
|
||
|
outfile = argv[fz_optind++];
|
||
|
}
|
||
|
|
||
|
subset = 0;
|
||
|
if (argc - fz_optind > 0)
|
||
|
subset = 1;
|
||
|
|
||
|
error = pdf_open_xref(&xref, infile, password);
|
||
|
if (error)
|
||
|
die(fz_rethrow(error, "cannot open input file '%s'", infile));
|
||
|
|
||
|
out = fopen(outfile, "wb");
|
||
|
if (!out)
|
||
|
die(fz_throw("cannot open output file '%s'", outfile));
|
||
|
|
||
|
fprintf(out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
|
||
|
fprintf(out, "%%\316\274\341\277\246\n\n");
|
||
|
|
||
|
uselist = fz_calloc(xref->len + 1, sizeof(char));
|
||
|
ofslist = fz_calloc(xref->len + 1, sizeof(int));
|
||
|
genlist = fz_calloc(xref->len + 1, sizeof(int));
|
||
|
renumbermap = fz_calloc(xref->len + 1, sizeof(int));
|
||
|
|
||
|
for (num = 0; num < xref->len; num++)
|
||
|
{
|
||
|
uselist[num] = 0;
|
||
|
ofslist[num] = 0;
|
||
|
genlist[num] = 0;
|
||
|
renumbermap[num] = num;
|
||
|
}
|
||
|
|
||
|
/* Make sure any objects hidden in compressed streams have been loaded */
|
||
|
preloadobjstms();
|
||
|
|
||
|
/* Only retain the specified subset of the pages */
|
||
|
if (subset)
|
||
|
retainpages(argc, argv);
|
||
|
|
||
|
/* Sweep & mark objects from the trailer */
|
||
|
if (dogarbage >= 1)
|
||
|
sweepobj(xref->trailer);
|
||
|
|
||
|
/* Coalesce and renumber duplicate objects */
|
||
|
if (dogarbage >= 3)
|
||
|
removeduplicateobjs();
|
||
|
|
||
|
/* Compact xref by renumbering and removing unused objects */
|
||
|
if (dogarbage >= 2)
|
||
|
compactxref();
|
||
|
|
||
|
/* Make renumbering affect all indirect references and update xref */
|
||
|
if (dogarbage >= 2)
|
||
|
renumberobjs();
|
||
|
|
||
|
writepdf();
|
||
|
|
||
|
if (fclose(out))
|
||
|
die(fz_throw("cannot close output file '%s'", outfile));
|
||
|
|
||
|
fz_free(uselist);
|
||
|
fz_free(ofslist);
|
||
|
fz_free(genlist);
|
||
|
fz_free(renumbermap);
|
||
|
|
||
|
pdf_free_xref(xref);
|
||
|
|
||
|
fz_flush_warnings();
|
||
|
|
||
|
return 0;
|
||
|
}
|