Upload GNU unRTF

git-svn-id: svn://kolibrios.org@8335 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
maxcodehack 2020-12-07 09:06:07 +00:00
parent d644c0e2c8
commit 60c3243924
35 changed files with 8579 additions and 0 deletions

31
programs/media/unrtf/Makefile Executable file
View File

@ -0,0 +1,31 @@
CC = kos32-gcc
LD = kos32-ld
SDK_DIR = $(abspath ../../../contrib/sdk)
CFLAGS = -c -fno-ident -O2 -fomit-frame-pointer -fno-ident -U__WIN32__ -U_Win32 -U_WIN32 -U__MINGW32__ -UWIN32
LDFLAGS = -static -S -nostdlib -T $(SDK_DIR)/sources/newlib/app.lds --image-base 0
INCLUDES = -I $(SDK_DIR)/sources/newlib/libc/include
LIBPATH = -L $(SDK_DIR)/lib -L /home/autobuild/tools/win32/mingw32/lib
# Only selected
SRC = convert.c word.c error.c main.c hash.c \
parse.c malloc.c attr.c util.c \
output.c html.c text.c vt.c ps.c latex.c wpml.c
# All .c files
# SRC = $(notdir $(wildcard *.c))
OBJECTS = $(patsubst %.c, %.o, $(SRC))
default: $(patsubst %.c,%.o,$(SRC))
kos32-ld $(LDFLAGS) $(LIBPATH) --subsystem console -o unrtf $(OBJECTS) -lgcc -lc.dll
objcopy unrtf -O binary
%.o : %.c Makefile $(SRC)
$(CC) $(CFLAGS) $(INCLUDES) -o $@ $<
clean:
rm *.o

620
programs/media/unrtf/attr.c Executable file
View File

@ -0,0 +1,620 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: attr
* Author name: Zach Smith
* Create date: 01 Aug 01
* Purpose: Character attribute stack.
*----------------------------------------------------------------------
* Changes:
* 01 Aug 01, tuorfa@yahoo.com: moved code over from convert.c
* 06 Aug 01, tuorfa@yahoo.com: added several font attributes.
* 18 Sep 01, tuorfa@yahoo.com: added AttrStack (stack of stacks) paradigm
* 22 Sep 01, tuorfa@yahoo.com: added comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "attr.h"
#include "main.h"
extern void starting_body();
extern void starting_text();
extern int simulate_allcaps;
extern int simulate_smallcaps;
#define MAX_ATTRS (1000)
/* For each RTF text block (the text within braces) we must keep
* an AttrStack which is a stack of attributes and their optional
* parameter. Since RTF text blocks are nested, these make up a
* stack of stacks. And, since RTF text blocks inherit attributes
* from parent blocks, all new AttrStacks do the same from
* their parent AttrStack.
*/
typedef struct _stack {
unsigned char attr_stack [MAX_ATTRS];
char *attr_stack_params [MAX_ATTRS];
int tos;
struct _stack *next;
}
AttrStack;
static AttrStack *stack_of_stacks = NULL;
static AttrStack *stack_of_stacks_top = NULL;
/*========================================================================
* Name: attr_express_begin
* Purpose: Print the HTML for beginning an attribute.
* Args: Attribute number, optional string parameter.
* Returns: None.
*=======================================================================*/
void
attr_express_begin (int attr, char* param) {
switch(attr)
{
case ATTR_BOLD:
printf (op->bold_begin);
break;
case ATTR_ITALIC:
printf (op->italic_begin);
break;
/* Various underlines, they all resolve to HTML's <u> */
case ATTR_THICK_UL:
case ATTR_WAVE_UL:
case ATTR_DASH_UL:
case ATTR_DOT_UL:
case ATTR_DOT_DASH_UL:
case ATTR_2DOT_DASH_UL:
case ATTR_WORD_UL:
case ATTR_UNDERLINE:
printf (op->underline_begin);
break;
case ATTR_DOUBLE_UL:
printf (op->dbl_underline_begin);
break;
case ATTR_FONTSIZE:
op_begin_std_fontsize (op, atoi (param));
break;
case ATTR_FONTFACE:
printf (op->font_begin,param);
break;
case ATTR_FOREGROUND:
printf (op->foreground_begin, param);
break;
case ATTR_BACKGROUND:
if (!simple_mode)
printf (op->foreground_begin,param);
break;
case ATTR_SUPER:
printf (op->superscript_begin);
break;
case ATTR_SUB:
printf (op->subscript_begin);
break;
case ATTR_STRIKE:
printf (op->strikethru_begin);
break;
case ATTR_DBL_STRIKE:
printf (op->dbl_strikethru_begin);
break;
case ATTR_EXPAND:
printf (op->expand_begin, param);
break;
case ATTR_OUTLINE:
printf (op->outline_begin);
break;
case ATTR_SHADOW:
printf (op->shadow_begin);
break;
case ATTR_EMBOSS:
printf (op->emboss_begin);
break;
case ATTR_ENGRAVE:
printf (op->engrave_begin);
break;
case ATTR_CAPS:
if (op->simulate_all_caps)
simulate_allcaps = TRUE;
break;
case ATTR_SMALLCAPS:
if (op->simulate_small_caps)
simulate_smallcaps = TRUE;
else {
if (op->small_caps_begin)
printf (op->small_caps_begin);
}
break;
}
}
/*========================================================================
* Name: attr_express_end
* Purpose: Print HTML to complete an attribute.
* Args: Attribute number.
* Returns: None.
*=======================================================================*/
void
attr_express_end (int attr, char *param)
{
switch(attr)
{
case ATTR_BOLD:
printf (op->bold_end);
break;
case ATTR_ITALIC:
printf (op->italic_end);
break;
/* Various underlines, they all resolve to HTML's </u> */
case ATTR_THICK_UL:
case ATTR_WAVE_UL:
case ATTR_DASH_UL:
case ATTR_DOT_UL:
case ATTR_DOT_DASH_UL:
case ATTR_2DOT_DASH_UL:
case ATTR_WORD_UL:
case ATTR_UNDERLINE:
printf (op->underline_end);
break;
case ATTR_DOUBLE_UL:
printf (op->dbl_underline_end);
break;
case ATTR_FONTSIZE:
op_end_std_fontsize (op, atoi (param));
break;
case ATTR_FONTFACE:
printf (op->font_end);
break;
case ATTR_FOREGROUND:
printf (op->foreground_end);
break;
case ATTR_BACKGROUND:
if (!simple_mode)
printf (op->background_end);
break;
case ATTR_SUPER:
printf (op->superscript_end);
break;
case ATTR_SUB:
printf (op->subscript_end);
break;
case ATTR_STRIKE:
printf (op->strikethru_end);
break;
case ATTR_DBL_STRIKE:
printf (op->dbl_strikethru_end);
break;
case ATTR_OUTLINE:
printf (op->outline_end);
break;
case ATTR_SHADOW:
printf (op->shadow_end);
break;
case ATTR_EMBOSS:
printf (op->emboss_end);
break;
case ATTR_ENGRAVE:
printf (op->engrave_end);
break;
case ATTR_EXPAND:
printf (op->expand_end);
break;
case ATTR_CAPS:
if (op->simulate_all_caps)
simulate_allcaps = FALSE;
break;
case ATTR_SMALLCAPS:
if (op->simulate_small_caps)
simulate_smallcaps = FALSE;
else {
if (op->small_caps_end)
printf (op->small_caps_end);
}
break;
}
}
/*========================================================================
* Name: attr_push
* Purpose: Pushes an attribute onto the current attribute stack.
* Args: Attribute number, optional string parameter.
* Returns: None.
*=======================================================================*/
void
attr_push(int attr, char* param)
{
AttrStack *stack = stack_of_stacks_top;
if (!stack) {
warning_handler ("no stack to push attribute onto");
return;
}
if (stack->tos>=MAX_ATTRS) { fprintf (stderr,"Too many attributes!\n"); return; }
/* Make sure it's understood we're in the <body> section. */
/* KLUDGE */
starting_body();
starting_text();
++stack->tos;
stack->attr_stack [stack->tos]=attr;
if (param)
stack->attr_stack_params [stack->tos]=my_strdup(param);
else
stack->attr_stack_params [stack->tos]=NULL;
attr_express_begin (attr, param);
}
/*========================================================================
* Name: attrstack_copy_all
* Purpose: Routine to copy all attributes from one stack to another.
* Args: Two stacks.
* Returns: None.
*=======================================================================*/
void
attrstack_copy_all (AttrStack *src, AttrStack *dest)
{
int i;
int total;
CHECK_PARAM_NOT_NULL(src);
CHECK_PARAM_NOT_NULL(dest);
total = src->tos + 1;
for (i=0; i<total; i++)
{
int attr=src->attr_stack [i];
char *param=src->attr_stack_params [i];
dest->attr_stack[i] = attr;
if (param)
dest->attr_stack_params[i] = my_strdup (param);
else
dest->attr_stack_params[i] = NULL;
}
dest->tos = src->tos;
}
/*========================================================================
* Name: attrstack_unexpress_all
* Purpose: Routine to un-express all attributes heretofore applied,
* without removing any from the stack.
* Args: Stack whost contents should be unexpressed.
* Returns: None.
* Notes: This is needed by attrstack_push, but also for \cell, which
* often occurs within a brace group, yet HTML uses <td></td>
* which clear attribute info within that block.
*=======================================================================*/
void
attrstack_unexpress_all (AttrStack *stack)
{
int i;
CHECK_PARAM_NOT_NULL(stack);
i=stack->tos;
while (i>=0)
{
int attr=stack->attr_stack [i];
char *param=stack->attr_stack_params [i];
attr_express_end (attr, param);
i--;
}
}
/*========================================================================
* Name: attrstack_push
* Purpose: Creates a new attribute stack, pushes it onto the stack
* of stacks, performs inheritance from previous stack.
* Args: None.
* Returns: None.
*=======================================================================*/
void
attrstack_push ()
{
AttrStack *new_stack;
AttrStack *prev_stack;
new_stack = (AttrStack*) my_malloc (sizeof (AttrStack));
bzero ((void*) new_stack, sizeof (AttrStack));
prev_stack = stack_of_stacks_top;
if (!stack_of_stacks) {
stack_of_stacks = new_stack;
} else {
stack_of_stacks_top->next = new_stack;
}
stack_of_stacks_top = new_stack;
new_stack->tos = -1;
if (prev_stack) {
attrstack_unexpress_all (prev_stack);
attrstack_copy_all (prev_stack, new_stack);
attrstack_express_all ();
}
}
/*========================================================================
* Name: attr_pop
* Purpose: Removes and undoes the effect of the top attribute of
* the current AttrStack.
* Args: The top attribute's number, for verification.
* Returns: Success/fail flag.
*=======================================================================*/
int
attr_pop (int attr)
{
AttrStack *stack = stack_of_stacks_top;
if (!stack) {
warning_handler ("no stack to pop attribute from");
return FALSE;
}
if(stack->tos>=0 && stack->attr_stack[stack->tos]==attr)
{
char *param = stack->attr_stack_params [stack->tos];
attr_express_end (attr, param);
if (param) my_free(param);
stack->tos--;
return TRUE;
}
else
return FALSE;
}
/*========================================================================
* Name: attr_read
* Purpose: Reads but leaves in place the top attribute of the top
* attribute stack.
* Args: None.
* Returns: Attribute number.
*=======================================================================*/
int
attr_read() {
AttrStack *stack = stack_of_stacks_top;
if (!stack) {
warning_handler ("no stack to read attribute from");
return FALSE;
}
if(stack->tos>=0)
{
int attr = stack->attr_stack [stack->tos];
return attr;
}
else
return ATTR_NONE;
}
/*========================================================================
* Name: attr_drop_all
* Purpose: Undoes all attributes that an AttrStack contains.
* Args: None.
* Returns: None.
*=======================================================================*/
void
attr_drop_all ()
{
AttrStack *stack = stack_of_stacks_top;
if (!stack) {
warning_handler ("no stack to drop all attributes from");
return;
}
while (stack->tos>=0)
{
char *param=stack->attr_stack_params [stack->tos];
if (param) my_free(param);
stack->tos--;
}
}
/*========================================================================
* Name: attrstack_drop
* Purpose: Removes the top AttrStack from the stack of stacks, undoing
* all attributes that it had in it.
* Args: None.
* Returns: None.
*=======================================================================*/
void
attrstack_drop ()
{
AttrStack *stack = stack_of_stacks_top;
AttrStack *prev_stack;
if (!stack) {
warning_handler ("no attr-stack to drop");
return;
}
attr_pop_all ();
prev_stack = stack_of_stacks;
while(prev_stack && prev_stack->next && prev_stack->next != stack)
prev_stack = prev_stack->next;
if (prev_stack) {
stack_of_stacks_top = prev_stack;
prev_stack->next = NULL;
} else {
stack_of_stacks_top = NULL;
stack_of_stacks = NULL;
}
my_free ((void*) stack);
attrstack_express_all ();
}
/*========================================================================
* Name: attr_pop_all
* Purpose: Routine to undo all attributes heretofore applied,
* also reversing the order in which they were applied.
* Args: None.
* Returns: None.
*=======================================================================*/
void
attr_pop_all()
{
AttrStack *stack = stack_of_stacks_top;
if (!stack) {
warning_handler ("no stack to pop from");
return;
}
while (stack->tos>=0) {
int attr=stack->attr_stack [stack->tos];
char *param=stack->attr_stack_params [stack->tos];
attr_express_end (attr,param);
if (param) my_free(param);
stack->tos--;
}
}
/*========================================================================
* Name: attrstack_express_all
* Purpose: Routine to re-express all attributes heretofore applied.
* Args: None.
* Returns: None.
* Notes: This is needed by attrstack_push, but also for \cell, which
* often occurs within a brace group, yet HTML uses <td></td>
* which clear attribute info within that block.
*=======================================================================*/
void
attrstack_express_all() {
AttrStack *stack = stack_of_stacks_top;
int i;
if (!stack) {
warning_handler ("no stack to pop from");
return;
}
i=0;
while (i<=stack->tos)
{
int attr=stack->attr_stack [i];
char *param=stack->attr_stack_params [i];
attr_express_begin (attr, param);
i++;
}
}
/*========================================================================
* Name: attr_pop_dump
* Purpose: Routine to un-express all attributes heretofore applied.
* Args: None.
* Returns: None.
* Notes: This is needed for \cell, which often occurs within a
* brace group, yet HTML uses <td></td> which clear attribute
* info within that block.
*=======================================================================*/
void
attr_pop_dump() {
AttrStack *stack = stack_of_stacks_top;
int i;
if (!stack) return;
i=stack->tos;
while (i>=0)
{
int attr=stack->attr_stack [i];
attr_pop (attr);
i--;
}
}

88
programs/media/unrtf/attr.h Executable file
View File

@ -0,0 +1,88 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: attr
* Author name: Zach Smith
* Create date: 1 Aug 2001
* Purpose: Definitions for attribute stack module.
*----------------------------------------------------------------------
* Changes:
* 01 Aug 01, tuorfa@yahoo.com: moved code over from convert.c
* 06 Aug 01, tuorfa@yahoo.com: added several attributes
* 18 Sep 01, tuorfa@yahoo.com: updates for AttrStack paradigm
*--------------------------------------------------------------------*/
enum {
ATTR_NONE=0,
ATTR_BOLD, ATTR_ITALIC,
ATTR_UNDERLINE, ATTR_DOUBLE_UL, ATTR_WORD_UL,
ATTR_THICK_UL, ATTR_WAVE_UL,
ATTR_DOT_UL, ATTR_DASH_UL, ATTR_DOT_DASH_UL, ATTR_2DOT_DASH_UL,
ATTR_FONTSIZE, ATTR_STD_FONTSIZE,
ATTR_FONTFACE,
ATTR_FOREGROUND, ATTR_BACKGROUND,
ATTR_CAPS,
ATTR_SMALLCAPS,
ATTR_SHADOW,
ATTR_OUTLINE,
ATTR_EMBOSS,
ATTR_ENGRAVE,
ATTR_SUPER, ATTR_SUB,
ATTR_STRIKE,
ATTR_DBL_STRIKE,
ATTR_EXPAND,
/* ATTR_CONDENSE */
};
extern void attr_push_core (int attr, char* param);
extern void attr_pop_core (int attr);
extern void attr_push(int attr, char* param);
extern void attrstack_push();
extern void attrstack_drop();
extern void attrstack_express_all();
extern int attr_pop(int attr);
extern int attr_read();
extern void attr_drop_all ();
extern void attr_pop_all();
extern void attr_pop_dump();

45
programs/media/unrtf/bcount.c Executable file
View File

@ -0,0 +1,45 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Program name: bcount
* Author name: Zach Smith
* Create date: 15 Oct 00
* Purpose: Counts the number of opening and closing braces while
* reading from stdin.
*--------------------------------------------------------------------*/
#include <stdio.h>
main (){
int n1,n2;
int ch;
n1=n2=0;
while (EOF!=(ch=getchar())) {
if (ch=='}') ++n1;
if (ch=='{') ++n2;
}
printf ("{=%d, }=%d\n", n2,n1);
}

2880
programs/media/unrtf/convert.c Executable file

File diff suppressed because it is too large Load Diff

51
programs/media/unrtf/convert.h Executable file
View File

@ -0,0 +1,51 @@
/*===========================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
===========================================================================*/
/*----------------------------------------------------------------------
* Module name: convert
* Author name: Zach Smith
* Create date: 19 Sep 2001
* Purpose: Definitions for the conversion module
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _CONVERT
enum {
CHARSET_ANSI=1,
CHARSET_MAC,
CHARSET_CP437,
CHARSET_CP850
};
#ifndef _WORD
#include "word.h"
#endif
extern void word_print (Word*);
#define _CONVERT
#endif

66
programs/media/unrtf/defs.h Executable file
View File

@ -0,0 +1,66 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: defs.h
* Author name: Zach Smith
* Create date: 1 Sept 2000
* Purpose: Basic definitions plus externs for UnRTF
*----------------------------------------------------------------------
* Changes:
* 21 Oct 00, tuorfa@yahoo.com: moved program version to this file
* 08 Apr 01, tuorfa@yahoo.com: updated usage info.
* 08 Sep 01, tuorfa@yahoo.com: added PROGRAM_NAME.
* 19 Sep 01, tuorfa@yahoo.com: added PROGRAM_WEBSITE.
*--------------------------------------------------------------------*/
#define PROGRAM_VERSION "0.18.1"
#define PROGRAM_NAME "UnRTF"
#define PROGRAM_WEBSITE "http://www.geocities.com/tuorfa"
/* Select the language for reporting of file creation/modificaton dates */
#define ENGLISH
#if 0
#define FRANCAIS
#define ITALIANO
#endif
#define TRUE (1)
#define FALSE (0)
#define USAGE "unrtf [--version] [--help] [--nopict|-n] [--html] [--text] [--vt] [--latex] [--ps] [--wpml] [-t html|text|vt|latex|ps|wpml] <filename>"
/* Default names for RTF's default fonts */
#define FONTNIL_STR "Times,TimesRoman,TimesNewRoman"
#define FONTROMAN_STR "Times,Palatino"
#define FONTSWISS_STR "Helvetica,Arial"
#define FONTMODERN_STR "Courier,Verdana"
#define FONTSCRIPT_STR "Cursive,ZapfChancery"
#define FONTDECOR_STR "ZapfChancery"
#define FONTTECH_STR "Symbol"

89
programs/media/unrtf/error.c Executable file
View File

@ -0,0 +1,89 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: error
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: Management of errors and warnings, when reporting
* the source code file/line is not necessary.
*----------------------------------------------------------------------
* Changes
* 10 Oct 00, tuorfa@yahoo.com: added usage()
* 15 Oct 00, tuorfa@yahoo.com: improved output readability
* 22 Sep 01, tuorfa@yahoo.com: removed mention of line number in handlers
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include "defs.h"
#include "main.h"
/*========================================================================
* Name: usage
* Purpose: Prints usage information and exits with an error.
* Args: None.
* Returns: None.
*=======================================================================*/
void
usage ()
{
fprintf (stdout, "Usage: %s\n", USAGE);
exit(0);
}
/*========================================================================
* Name: error_handler
* Purpose: Prints error message and other useful info, then exits.
* Args: Message.
* Returns: None.
*=======================================================================*/
void
error_handler (char* message)
{
fprintf (stderr, "Error: %s\n", message);
exit(10);
}
/*========================================================================
* Name: warning_handler
* Purpose: Prints useful info to stderr, but doesn't exit.
* Args: Message.
* Returns: None.
*=======================================================================*/
void
warning_handler (char* message)
{
fprintf (stderr, "Warning: %s\n", message);
}

45
programs/media/unrtf/error.h Executable file
View File

@ -0,0 +1,45 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: error.h
* Author name: Zach Smith
* Create date: 1 Sept 2000
* Purpose: Macros to be executed at the start of a function,
* when reporting source code file/line is useful.
*----------------------------------------------------------------------
* Changes
*--------------------------------------------------------------------*/
#include <stdlib.h>
#define CHECK_PARAM_NOT_NULL(XX) { if ((XX)==NULL) { fprintf (stderr, "internal error: null pointer param in %s at %d\n", __FILE__, __LINE__); exit (1); }}
#define CHECK_MALLOC_SUCCESS(XX) { if ((XX)==NULL) { fprintf (stderr, "internal error: cannot allocate memory in %s at %d\n", __FILE__, __LINE__); exit (1); }}
extern void usage(void);
extern void error_handler (char*);
extern void warning_handler (char*);

197
programs/media/unrtf/hash.c Executable file
View File

@ -0,0 +1,197 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: hash
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: Word-hash management. Words are put into a hash and an
* identifier is returned. This is used to save us from
* doing multiple mallocs for recurring strings such as
* 'the' and \par. This is not a big issue under Unix,
* but it is under other OSes and anyway, waste not want not.
*----------------------------------------------------------------------
* Changes:
* 08 Apr 01, tuorfa@yahoo.com: check for out of memory after malloc.
* 21 Apr 01, tuorfa@yahoo.com: signed to conversion unsigned bug
* 03 Aug 01, tuorfa@yahoo.com: fixes for using 16-bit compiler
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "error.h"
#include "main.h"
#include "malloc.h"
typedef struct _hi {
struct _hi *next;
char *str;
unsigned long value;
}
HashItem;
/* Index by first char of string */
static HashItem *hash[256];
static unsigned long hash_length[256];
static unsigned long hash_value=0;
/*========================================================================
* Name: hash_init
* Purpose: Clear the hash table.
* Args: None.
* Returns: None.
*=======================================================================*/
void
hash_init ()
{
int i;
for (i=0; i<256; i++) {
hash[i]=NULL;
hash_length[i]=0;
}
}
/*========================================================================
* Name: hash_stats
* Purpose: Prints to stderr the number of words stored.
* Args: None.
* Returns: None.
*=======================================================================*/
void
hash_stats ()
{
int i;
unsigned long total=0;
for (i=0; i<256; i++) {
total += hash_length[i];
}
fprintf (stderr,"%lu words were hashed.\n", total);
}
/*========================================================================
* Name: hashitem_new
* Purpose: Creates a new linked list item for the hash table.
* Args: String.
* Returns: HashItem.
*=======================================================================*/
static HashItem *
hashitem_new (char *str)
{
HashItem *hi;
unsigned long i;
hi=(HashItem*) my_malloc(sizeof(HashItem));
if (!hi)
error_handler ("out of memory");
memset ((void*)hi, 0, sizeof (HashItem));
hi->str = my_strdup(str);
i = *str;
if (i=='\\') i=str[1];
i <<= 24;
hi->value = i | (hash_value++ & 0xffffff);
hi->next = NULL;
#if 0
if (debug_mode) {
printf ("<!-- storing val %08lx str %s -->\n",
hi->value, hi->str);
}
#endif
return hi;
}
/*========================================================================
* Name: hash_get_index
* Purpose: Given a string, returns the "index" i.e. the word identifier.
* Args: String.
* Returns: Index.
*=======================================================================*/
unsigned long
hash_get_index (char *str)
{
unsigned short index;
HashItem *hi;
char ch;
ch = *str;
if (ch=='\\' && *(str+1))
ch = *(str+1);
index = (unsigned) ch;
hi = hash[index];
while (hi) {
if (!strcmp(hi->str,str))
return hi->value;
hi=hi->next;
}
/* not in hash */
hi = hashitem_new (str);
hi->next = hash[index];
hash [index] = hi;
++hash_length [index];
return hi->value;
}
/*========================================================================
* Name: hash_get_string
* Purpose: Given the index (word identifier) returns the word string.
* Args: Index.
* Returns: String, or NULL if not found.
*=======================================================================*/
char*
hash_get_string (unsigned long value)
{
int index;
HashItem *hi;
index = value >> 24;
hi = hash[index];
while (hi) {
if (hi->value == value)
return hi->str;
hi=hi->next;
}
warning_handler ("word not in hash");
return NULL;
}

40
programs/media/unrtf/hash.h Executable file
View File

@ -0,0 +1,40 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: hash.h
* Author name: Zach Smith
* Create date: 1 Sept 2000
* Purpose: Definitions for the hash module.
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
extern void hash_init (void);
extern void hash_stats (void);
extern unsigned long hash_get_index (char *);
extern char* hash_get_string (unsigned long );

537
programs/media/unrtf/html.c Executable file
View File

@ -0,0 +1,537 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: html
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: HTML-specific output module
*----------------------------------------------------------------------
* Changes:
* 01 Aug 01, tuorfa@yahoo.com: code moved over from convert.c
* 03 Aug 01, tuorfa@yahoo.com: removed null entries to save space
* 08 Aug 01, tuorfa@yahoo.com, gommer@gmx.net: fixed/added some ANSI chars
* 18 Sep 01, tuorfa@yahoo.com: moved character sets into html.c etc
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char* ascii [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&amp;", "'",
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "&lt;", "=", "&gt;", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char* ansi [] = {
/* 0x82 */ "&lsquor;", "&fnof;",
"&ldquor;", "&hellip;", "&dagger;", "&Dagger;",
"&circ;", "&permil;", "&Scaron;", "&lsaquo;",
"&OElig;", NULL, NULL, NULL,
/* 0x90 */ NULL,"`","'","``","''","&bull;","&ndash;","&mdash;",
/* 0x98 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xa0 */ "&nbsp;","&iexcl;","&cent;","&pound;","&curren;","&yen;","&brvbar;","&sect;",
/* 0xa8 */ "&uml;","&copy;","&ordf;","&laquo;","&not;","&shy;","&reg;","&macr;",
/* 0xb0 */ "&deg;", "&plusmn;","&sup2;","&sup3;","&acute;","&micro;","&para;","&middot;",
/* 0xb8 */ "&cedil;","&sup1", "&ordm;","&raquo", "&frac14", "&frac12;","&frac34;","&iquest;",
/* 0xc0 */ "&Agrave;","&Aacute;","&Acirc;","&Atilde;","&Auml;","&Aring;","&AElig;","&Ccedil;",
/* 0xc8 */ "&Egrave;","&Eacute;","&Ecirc;","&Euml;","&Igrave;","&Iacute;","&Icirc;","&Iuml;",
/* 0xd0 */ "&ETH;","&Ntilde;","&Ograve;","&Oacute;","&Ocirc;","&Otilde;","&Ouml;","&times;",
/* 0xd8 */ "&Oslash;","&Ugrave;","&Uacute;","&Ucirc;","&Uuml;","&Yacute;","&THORN;","&szlig;",
/* 0xe0 */ "&agrave;","&aacute;","&acirc;","&atilde;","&auml;","&aring;","&aelig;","&ccedil;",
/* 0xe8 */ "&egrave;","&eacute;","&ecirc;","&euml;","&igrave;","&iacute;","&icirc;","&iuml;",
/* 0xf0 */ "&eth;","&ntilde;","&ograve;","&oacute;","&ocirc;","&otilde;","&ouml;","&divide;",
/* 0xf8 */ "&oslash;","&ugrave;","&uacute;","&ucirc;","&uuml;","&yacute;","&thorn;","&yuml;",
};
static char* mac [] = {
/* 0xa4 */ "&bull;", NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xb0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xc0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xd0 */ "&mdash;","&ndash;","&ldquo;","&rdquo;","&lquo;","&rquo;",
};
static char* cp437 [] = {
/* 0x80 */ "&ccedil;",
/* 0x81 */ "&uuml;",
/* 0x82 */ "&eacute;",
/* 0x83 */ "&acirc;",
/* 0x84 */ "&auml;",
/* 0x85 */ "&agrave;",
/* 0x86 */ "&aring;",
/* 0x87 */ "&ccedil;",
/* 0x88 */ "&ecirc;",
/* 0x89 */ "&euml;",
/* 0x8a */ "&egrave;",
/* 0x8b */ "&iuml;",
/* 0x8c */ "&icirc;",
/* 0x8d */ "&igrave;",
/* 0x8e */ "&auml;",
/* 0x8f */ "&aring;",
/* 0x90 */ "&eacute;",
/* 0x91 */ "&aelig;",
/* 0x92 */ "&aelig;",
/* 0x93 */ "&ocirc;",
/* 0x94 */ "&ouml;",
/* 0x95 */ "&ograve;",
/* 0x96 */ "&ucirc;",
/* 0x97 */ "&ugrave;",
/* 0x98 */ "&yuml;",
/* 0x99 */ "&ouml;",
/* 0x9a */ "&uuml;",
/* 0x9b */ "&cent;",
/* 0x9c */ "&pound;",
/* 0x9d */ "&yen;",
/* 0x9e */ "&#8359", /* peseta */
/* 0x9f */ "&#402", /* small f with hook */
/* 0xa0 */ "&aacute;",
/* 0xa1 */ "&iacute;",
/* 0xa2 */ "&oacute;",
/* 0xa3 */ "&uacute;",
/* 0xa4 */ "&ntilde;",
/* 0xa5 */ "&ntilde;",
/* 0xa6 */ "&ordf;",
/* 0xa7 */ "&frac14;",
/* 0xa8 */ "&iquest;",
/* 0xa9 */ "&#8976", /* reversed not */
/* 0xaa */ "&not;",
/* 0xab */ "&frac12;",
/* 0xac */ "&raquo;",
/* 0xad */ "&iexcl;",
/* 0xae */ "&laquo;",
/* 0xaf */ "&ordm;",
/* 0xb0 */ "&#9617;", /* light shade */
/* 0xb1 */ "&#9618;", /* med. shade */
/* 0xb2 */ "&#9619;", /* dark shade */
/* 0xb3 */ "&#9474;", /* box-draw light vert. */
/* 0xb4 */ "&#9508;", /* box-draw light vert. + lt. */
/* 0xb5 */ "&#9569;", /* box-draw vert. sgl. + lt. dbl. */
/* 0xb6 */ "&#9570;", /* box-draw vert. dbl. + lt. sgl. */
/* 0xb7 */ "&#9558;", /* box-draw dn. dbl. + lt. sgl. */
/* 0xb8 */ "&#9557;", /* box-draw dn. sgl. + lt. dbl. */
/* 0xb9 */ "&#9571;", /* box-draw dbl. vert. + lt. */
/* 0xba */ "&#9553;", /* box-draw dbl. vert. */
/* 0xbb */ "&#9559;", /* box-draw dbl. dn. + lt. */
/* 0xbc */ "&#9565;", /* box-draw dbl. up + lt. */
/* 0xbd */ "&#9564;", /* box-draw up dbl. + lt. sgl. */
/* 0xbe */ "&#9563;", /* box-draw up sgl. + lt. dbl. */
/* 0xbf */ "&#9488;", /* box-draw light dn. + lt. */
/* 0xc0 */ "&#9492;", /* box-draw light up + rt. */
/* 0xc1 */ "&#9524;", /* box-draw light up + horiz. */
/* 0xc2 */ "&#9516;", /* box-draw light dn. + horiz. */
/* 0xc3 */ "&#9500;", /* box-draw light vert. + rt. */
/* 0xc4 */ "&#9472;", /* box-draw light horiz. */
/* 0xc5 */ "&#9532;", /* box-draw light vert. + horiz. */
/* 0xc6 */ "&#9566;", /* box-draw vert. sgl. + rt. dbl. */
/* 0xc7 */ "&#9567;", /* box-draw vert. dbl. + rt. sgl. */
/* 0xc8 */ "&#9562;", /* box-draw dbl. up + rt. */
/* 0xc9 */ "&#9556;", /* box-draw dbl. dn. + rt. */
/* 0xca */ "&#9577;", /* box-draw dbl. up + horiz. */
/* 0xcb */ "&#9574;", /* box-draw dbl. dn. + horiz. */
/* 0xcc */ "&#9568;", /* box-draw dbl. vert. + rt. */
/* 0xcd */ "&#9552;", /* box-draw dbl. horiz. */
/* 0xce */ "&#9580;", /* box-draw dbl. vert. + horiz. */
/* 0xcf */ "&#9575;", /* box-draw up sgl. + horiz. dbl. */
/* 0xd0 */ "&#9576;", /* box-draw up dbl. + horiz. sgl. */
/* 0xd1 */ "&#9572;", /* box-draw dn. sgl. + horiz. dbl. */
/* 0xd2 */ "&#9573;", /* box-draw dn. dbl. + horiz. sgl. */
/* 0xd3 */ "&#9561;", /* box-draw up dbl. + rt. sgl. */
/* 0xd4 */ "&#9560;", /* box-draw up sgl. + rt. dbl. */
/* 0xd5 */ "&#9554;", /* box-draw dn. sgl. + rt. dbl. */
/* 0xd6 */ "&#9555;", /* box-draw dn. dbl. + rt. sgl. */
/* 0xd7 */ "&#9579;", /* box-draw vert. dbl. + horiz. sgl. */
/* 0xd8 */ "&#9578;", /* box-draw vert. sgl. + horiz. dbl. */
/* 0xd9 */ "&#9496;", /* box-draw light up + lt. */
/* 0xda */ "&#9484;", /* box-draw light dn. + rt. */
/* 0xdb */ "&#9608;", /* full block */
/* 0xdc */ "&#9604;", /* lower 1/2 block */
/* 0xdd */ "&#9612;", /* lt. 1/2 block */
/* 0xde */ "&#9616;", /* rt. 1/2 block */
/* 0xdf */ "&#9600;", /* upper 1/2 block */
/* 0xe0 */ "&#945;", /* greek small alpha */
/* 0xe1 */ "&szlig;",
/* 0xe2 */ "&#915;", /* greek cap gamma */
/* 0xe3 */ "&#960;", /* greek small pi */
/* 0xe4 */ "&#931;", /* greek cap sigma */
/* 0xe5 */ "&#963;", /* greek small sigma */
/* 0xe6 */ "&micro;",
/* 0xe7 */ "&#964;", /* greek small tau */
/* 0xe8 */ "&#934;", /* greek cap phi */
/* 0xe9 */ "&#920;", /* greek cap theta */
/* 0xea */ "&#937;", /* greek cap omega */
/* 0xeb */ "&#948;", /* greek small delta */
/* 0xec */ "&#8734;", /* inf. */
/* 0xed */ "&#966;", /* greek small phi */
/* 0xee */ "&#949;", /* greek small epsilon */
/* 0xef */ "&#8745;", /* intersect */
/* 0xf0 */ "&#8801;", /* identical */
/* 0xf1 */ "&plusmn;",
/* 0xf2 */ "&#8805;", /* greater-than or equal to */
/* 0xf3 */ "&#8804;", /* less-than or equal to */
/* 0xf4 */ "&#8992;", /* top 1/2 integral */
/* 0xf5 */ "&#8993;", /* bottom 1/2 integral */
/* 0xf6 */ "&divide;",
/* 0xf7 */ "&#8776;", /* almost = */
/* 0xf8 */ "&plus;",
/* 0xf9 */ "&#8729;", /* bullet op */
/* 0xfa */ "&middot;",
/* 0xfb */ "&#8730;", /* sqrt */
/* 0xfc */ "&#8319;", /* super-script small n */
/* 0xfd */ "&sup2;",
/* 0xfe */ "&#9632;", /* black square */
/* 0xff */ "&nbsp;",
};
static char* cp850 [] = {
/* 0x80 */ "&ccedil;",
/* 0x81 */ "&uuml;",
/* 0x82 */ "&eacute;",
/* 0x83 */ "&acirc;",
/* 0x84 */ "&auml;",
/* 0x85 */ "&agrave;",
/* 0x86 */ "&aring;",
/* 0x87 */ "&ccedil;",
/* 0x88 */ "&ecirc;",
/* 0x89 */ "&euml;",
/* 0x8a */ "&egrave;",
/* 0x8b */ "&iuml;",
/* 0x8c */ "&icirc;",
/* 0x8d */ "&igrave;",
/* 0x8e */ "&auml;",
/* 0x8f */ "&aring;",
/* 0x90 */ "&eacute;",
/* 0x91 */ "&aelig;",
/* 0x92 */ "&aelig;",
/* 0x93 */ "&ocirc;",
/* 0x94 */ "&ouml;",
/* 0x95 */ "&ograve;",
/* 0x96 */ "&ucirc;",
/* 0x97 */ "&ugrave;",
/* 0x98 */ "&yuml;",
/* 0x99 */ "&ouml;",
/* 0x9a */ "&uuml;",
/* 0x9b */ "&oslash;",
/* 0x9c */ "&pound;",
/* 0x9d */ "&oslash;",
/* 0x9e */ "&times;",
/* 0x9f */ "&#402;", /* small f with hook */
/* 0xa0 */ "&aacute;",
/* 0xa1 */ "&iacute;",
/* 0xa2 */ "&oacute;",
/* 0xa3 */ "&uacute;",
/* 0xa4 */ "&ntilde;",
/* 0xa5 */ "&ntilde;",
/* 0xa6 */ "&ordf;",
/* 0xa7 */ "&frac14;",
/* 0xa8 */ "&iquest;",
/* 0xa9 */ "&reg;",
/* 0xaa */ "&not;",
/* 0xab */ "&frac12;",
/* 0xac */ "&raquo;",
/* 0xad */ "&iexcl;",
/* 0xae */ "&laquo;",
/* 0xaf */ "&ordm;",
/* 0xb0 */ "&#9617;", /* light shade */
/* 0xb1 */ "&#9618;", /* med. shade */
/* 0xb2 */ "&#9619;", /* dark shade */
/* 0xb3 */ "&#9474;", /* box-draw light vert. */
/* 0xb4 */ "&#9508;", /* box-draw light vert. + lt. */
/* 0xb5 */ "&aacute;",
/* 0xb6 */ "&acirc;",
/* 0xb7 */ "&agrave;",
/* 0xb8 */ "&copy;",
/* 0xb9 */ "&#9571;", /* box-draw dbl. vert. + lt. */
/* 0xba */ "&#9553;", /* box-draw dbl. vert. */
/* 0xbb */ "&#9559;", /* box-draw dbl. dn. + lt. */
/* 0xbc */ "&#9565;", /* box-draw dbl. up + lt. */
/* 0xbd */ "&cent;",
/* 0xbe */ "&yen;",
/* 0xbf */ "&#9488;", /* box-draw light dn. + lt. */
/* 0xc0 */ "&#9492;", /* box-draw light up + rt. */
/* 0xc1 */ "&#9524;", /* box-draw light up + horiz. */
/* 0xc2 */ "&#9516;", /* box-draw light dn. + horiz. */
/* 0xc3 */ "&#9500;", /* box-draw light vert. + rt. */
/* 0xc4 */ "&#9472;", /* box-draw light horiz. */
/* 0xc5 */ "&#9532;", /* box-draw light vert. + horiz. */
/* 0xc6 */ "&atilde;",
/* 0xc7 */ "&atilde;",
/* 0xc8 */ "&#9562;", /* box-draw dbl. up + rt. */
/* 0xc9 */ "&#9556;", /* box-draw dbl. dn. + rt. */
/* 0xca */ "&#9577;", /* box-draw dbl. up + horiz. */
/* 0xcb */ "&#9574;", /* box-draw dbl. dn. + horiz. */
/* 0xcc */ "&#9568;", /* box-draw dbl. vert. + rt. */
/* 0xcd */ "&#9552;", /* box-draw dbl. horiz. */
/* 0xce */ "&#9580;", /* box-draw dbl. vert. + horiz. */
/* 0xcf */ "&curren;",
/* 0xd0 */ "&eth;",
/* 0xd1 */ "&eth;",
/* 0xd2 */ "&ecirc;",
/* 0xd3 */ "&euml;",
/* 0xd4 */ "&egrave;",
/* 0xd5 */ "&#305;", /* small dotless i */
/* 0xd6 */ "&iacute;",
/* 0xd7 */ "&icirc;",
/* 0xd8 */ "&iuml;",
/* 0xd9 */ "&#9496;", /* box-draw light up + lt. */
/* 0xda */ "&#9484;", /* box-draw light dn. + rt. */
/* 0xdb */ "&#9608;", /* full-block */
/* 0xdc */ "&#9604;", /* lower 1/2 block */
/* 0xdd */ "&brvbar;",
/* 0xde */ "&igrave;",
/* 0xdf */ "&#9600;", /* upper 1/2 block */
/* 0xe0 */ "&oacute;",
/* 0xe1 */ "&szlig;",
/* 0xe2 */ "&ocirc;",
/* 0xe3 */ "&ograve;",
/* 0xe4 */ "&otilde;",
/* 0xe5 */ "&otilde;",
/* 0xe6 */ "&micro;",
/* 0xe7 */ "&thorn;",
/* 0xe8 */ "&thorn;",
/* 0xe9 */ "&uacute;",
/* 0xea */ "&ucirc;",
/* 0xeb */ "&ugrave;",
/* 0xec */ "&yacute;",
/* 0xed */ "&yacute;",
/* 0xee */ "&macr;",
/* 0xef */ "&acute;",
/* 0xf0 */ "&shy;",
/* 0xf1 */ "&plusmn;",
/* 0xf2 */ "&#8215;", /* dbl. lowline */
/* 0xf3 */ "&frac34;",
/* 0xf4 */ "&para;",
/* 0xf5 */ "&sect;",
/* 0xf6 */ "&divide;",
/* 0xf7 */ "&cedil;",
/* 0xf8 */ "&plus;",
/* 0xf9 */ "&uml;",
/* 0xfa */ "&middot;",
/* 0xfb */ "&sup1;",
/* 0xfc */ "&sup3;",
/* 0xfd */ "&sup2;",
/* 0xfe */ "&#9632;", /* black square */
/* 0xff */ "&nbsp;",
};
/*========================================================================
* Name: html_init
* Purpose: Generates the HTML output personality.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
html_init (void)
{
OutputPersonality* op;
op = op_create();
op->comment_begin = "<!--- ";
op->comment_end = " --->\n";
op->document_begin = "<html>\n";
op->document_end = "</html>\n";
op->header_begin = "<head>\n";
op->header_end = "</head>\n";
op->document_title_begin = "<title>";
op->document_title_end = "</title>\n";
op->document_author_begin = "<!--author: ";
op->document_author_end = "--->\n";
op->document_changedate_begin = "<!--changed: ";
op->document_changedate_end = "--->\n";
op->body_begin = "<body>";
op->body_end = "</body>\n";
op->paragraph_begin = "<p>";
op->paragraph_end = "</p>\n";
op->center_begin = "<center>";
op->center_end = "</center>\n";
op->justify_begin = "<div align=justify>\n";
op->justify_end = "</div>\n";
op->align_left_begin = "<div align=left>\n";
op->align_left_end = "</div>\n";
op->align_right_begin = "<div align=right>\n";
op->align_right_end = "</div>\n";
op->forced_space = "&nbsp;";
op->line_break = "<br>\n";
op->page_break = "<p><hr><p>\n";
op->hyperlink_begin = "<a href=\"";
op->hyperlink_end = "\">hyperlink</a>";
op->imagelink_begin = "<img src=\"";
op->imagelink_end = "\">";
op->table_begin = "<table border=2>\n";
op->table_end = "</table>\n";
op->table_row_begin = "<tr>";
op->table_row_end = "</tr>\n";
op->table_cell_begin = "<td>";
op->table_cell_end = "</td>\n";
/* Character attributes */
op->font_begin = "<font face=\"%s\">";
op->font_end = "</font>";
op->fontsize_begin = "<span style=\"font-size:%spt\">";
op->fontsize_end = "</span>";
op->fontsize8_begin = "<font size=1>";
op->fontsize8_end = "</font>";
op->fontsize10_begin = "<font size=2>";
op->fontsize10_end = "</font>";
op->fontsize12_begin = "<font size=3>";
op->fontsize12_end = "</font>";
op->fontsize14_begin = "<font size=4>";
op->fontsize14_end = "</font>";
op->fontsize18_begin = "<font size=5>";
op->fontsize18_end = "</font>";
op->fontsize24_begin = "<font size=6>";
op->fontsize24_end = "</font>";
op->smaller_begin = "<small>";
op->smaller_end = "</small>";
op->bigger_begin = "<big>";
op->bigger_end = "</big>";
op->foreground_begin = "<font color=\"%s\">";
op->foreground_end = "</font>";
op->background_begin = "<span style=\"background:%s\">";
op->background_end = "</span>";
op->bold_begin = "<b>";
op->bold_end = "</b>";
op->italic_begin = "<i>";
op->italic_end = "</i>";
op->underline_begin = "<u>";
op->underline_end = "</u>";
op->dbl_underline_begin = "<u>";
op->dbl_underline_end = "</u>";
op->superscript_begin = "<sup>";
op->superscript_end = "</sup>";
op->subscript_begin = "<sub>";
op->subscript_end = "</sub>";
op->strikethru_begin = "<s>";
op->strikethru_end = "</s>";
op->dbl_strikethru_begin = "<s>";
op->dbl_strikethru_end = "</s>";
op->emboss_begin="<span style=\"background:gray\"><font color=black>";
op->emboss_end = "</font></span>";
op->engrave_begin = "<span style=\"background:gray\"><font color=navyblue>";
op->engrave_end = "</font></span>";
op->shadow_begin= "<span style=\"background:gray\">";
op->shadow_end= "</span>";
op->outline_begin= "<span style=\"background:gray\">";
op->outline_end= "</span>";
op->expand_begin = "<span style=\"letter-spacing: %s\">";
op->expand_end = "</span>";
op->pointlist_begin = "<ol>\n";
op->pointlist_end = "</ol>\n";
op->pointlist_item_begin = "<li>";
op->pointlist_item_end = "</li>\n";
op->numericlist_begin = "<ul>\n";
op->numericlist_end = "</ul>\n";
op->numericlist_item_begin = "<li>";
op->numericlist_item_end = "</li>\n";
op->simulate_small_caps = TRUE;
op->simulate_all_caps = TRUE;
op->simulate_word_underline = TRUE;
op->ascii_translation_table = ascii;
op->ansi_translation_table = ansi;
op->ansi_first_char = 0x82;
op->ansi_last_char = 0xff;
op->cp437_translation_table = cp437;
op->cp437_first_char = 0x80;
op->cp437_last_char = 0xff;
op->cp850_translation_table = cp850;
op->cp850_first_char = 0x80;
op->cp850_last_char = 0xff;
op->mac_translation_table = mac;
op->mac_first_char = 0xa4;
op->mac_last_char = 0xd5;
op->chars.right_quote = "'";
op->chars.left_quote = "`";
op->chars.right_dbl_quote = "''";
op->chars.left_dbl_quote = "``";
return op;
}

42
programs/media/unrtf/html.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: html
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Definitions for the HTML output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _HTML
extern OutputPersonality* html_init(void);
#define _HTML
#endif

296
programs/media/unrtf/latex.c Executable file
View File

@ -0,0 +1,296 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: latex
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: LaTeX-specific output module
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
* 23 Sep 01, tuorfa@yahoo.com: fixed accented characters
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char* ascii [96] = {
/* 0x20 */ " ", "!", "''", "\\#", "{\\$}", "\\%", "\\&", "'",
/* 0x28 */ "(", ")", "{\ast}", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "{\\slash}", "]", "{\\caret}", "\\_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "\\{", "$\\mid$", "\\}", "\\~{ }", "",
};
static char* ansi [] = {
/* 0x82 */ "?", "?",
"?", "{\\ldots}", "{\\dag}", "{\\ddag}",
"?", "?", "?", "?",
"{\\OE}", NULL, NULL, NULL,
/* 0x90 */ NULL,"`","'","``","''","{\\bullet}","--","---",
/* 0x98 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
/* 0xa0 */ "\\:","?","?","{\\pounds}","?","\\Y","?","?",
/* 0xa8 */ "?","{\\copyright}","?","?","?","?","?","?",
/* 0xb0 */ "\\o ", "\\+- ","$^{2}$","$^{3}$","?","?","\\P ","?",
/* 0xb8 */ "?","$^{1}$", "?","?", "\\frac{1}{4}", "\\frac{1}{2}","\\frac{3}{4}",
"?",
/* 0xc0 */ "\\`{A}","\\'{A}","\\o{A}",
"\\~{A}","\\\"{A}","?","\\AE ","\\c{C}",
/* 0xc8 */ "\\`{E}","\\'{E}","\\o{E}","\\\"{E}",
"\\`{I}","\\'{I}","\\o{I}","\\\"{I}",
/* 0xd0 */ "\\ETH ","\\~{N}","\\`{O}","\\'{O}",
"\\o{O}","\\~{O}","\\\"{O}","\\mult ",
/* 0xd8 */ "?","\\`{U}","\\'{U}","\\o{U}",
"\\\"{U}","\\'{Y}","\\THORN","?",
/* 0xe0 */ "\\`{a}","\\'{a}","\\o{a}",
"\\~{a}","\\\"{a}","?","\\ae ","\\c{c}",
/* 0xe8 */ "\\`{e}","\\'{e}","\\o{e}","\\\"{e}",
"\\`{i}","\\'{i}","\\o{i}","\\\"{i}",
/* 0xf0 */ "\\eth ","\\~{n}","\\`{o}","\\'{o}",
"\\o{o}","\\~{o}","\\\"{o}","\\div ",
/* 0xf8 */ "\\slash{o}","\\`{u}","\\'{u}","\\o{u}",
"\\\"{u}","\\'{y}","\\thorn ","\\\"{y}",
};
static char* mac [] = {
"?",
};
static char* cp437 [] = {
"?",
};
static char* cp850 [] = {
"?",
};
/*========================================================================
* Name: latex_init
* Purpose: Generates the output personality for LaTeX.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
latex_init (void)
{
OutputPersonality* op;
op = op_create();
op->comment_begin = "%% ";
op->comment_end = "\n";
op->document_begin = "\\documentclass[11pt]{article}\n";
op->document_end = "\\end{document}";
op->header_begin = "";
op->header_end = "";
op->document_title_begin = "\\title{";
op->document_title_end = "}\n";
op->document_author_begin = "\\author{";
op->document_author_end = "}\n";
op->document_changedate_begin = "\\date{";
op->document_changedate_end = "}\n";
op->body_begin = "\n\n\\begin{document}\n\\maketitle\n";
op->body_end = "\\end{document}\n";
op->paragraph_begin = "\\par\n";
op->paragraph_end = "";
op->center_begin = "\\center{\n";
op->center_end = "}\n";
op->justify_begin = "";
op->justify_end = "";
op->align_left_begin = "\\begin{sloppy}\n";
op->align_left_end = "\\end{sloppy}\n";
op->align_right_begin = "";
op->align_right_end = "";
op->forced_space = "";
op->line_break = "\\par\n";
op->page_break = "\\pagebreak ";
op->hyperlink_begin = "";
op->hyperlink_end = "";
op->imagelink_begin = "";
op->imagelink_end = "";
op->table_begin = "\\begin{tabular}{|lllll}\n";
op->table_end = "\n\\end{tabular}\n";
op->table_row_begin = "";
op->table_row_end = "\\hline \\\\\n";
op->table_cell_begin = "";
op->table_cell_end = " & ";
/* Character attributes */
op->font_begin = "";
op->font_end = "";
op->fontsize_begin = "";
op->fontsize_end = "";
op->fontsize8_begin = "\\tiny{";
op->fontsize8_end = "}";
op->fontsize10_begin = "\\small{";
op->fontsize10_end = "}";
op->fontsize12_begin = "\\normalsize{";
op->fontsize12_end = "}";
op->fontsize14_begin = "{\\large ";
op->fontsize14_end = "}";
op->fontsize18_begin = "{\\Large ";
op->fontsize18_end = "}";
op->fontsize24_begin = "{\\LARGE ";
op->fontsize24_end = "}";
op->fontsize36_begin = "{\\huge ";
op->fontsize36_end = "}";
op->fontsize48_begin = "{\\Huge ";
op->fontsize48_end = "}";
op->smaller_begin = "";
op->smaller_end = "";
op->bigger_begin = "";
op->bigger_end = "";
op->foreground_begin = "";
op->foreground_end = "";
op->background_begin = "";
op->background_end = "";
op->bold_begin = "{\\bf ";
op->bold_end = "}";
op->italic_begin = "{\\it ";
op->italic_end = "}";
op->underline_begin = "";
op->underline_end = "\n";
op->dbl_underline_begin = "{\\ul ";
op->dbl_underline_end = "}";
op->pointlist_begin = "\\begin{itemize}\n";
op->pointlist_end = "\\end{itemize}\n";
op->pointlist_item_begin = "\\item ";
op->pointlist_item_end = "";
op->numericlist_begin = "\\begin{enumerate}\n";
op->numericlist_end = "\\end{enumerate}\n";
op->numericlist_item_begin = "\\item ";
op->numericlist_item_end = "";
op->superscript_begin = "$^{";
op->superscript_end = "}$";
op->subscript_begin = "$_{";
op->subscript_end = "}$";
op->strikethru_begin = "{";
op->strikethru_end = "}";
op->dbl_strikethru_begin = "{";
op->dbl_strikethru_end = "}";
op->emboss_begin="";
op->emboss_end = "";
op->engrave_begin = "";
op->engrave_end = "";
op->shadow_begin= "";
op->shadow_end= "";
op->small_caps_begin= "\\textsc{";
op->small_caps_end= "}";
op->outline_begin= "";
op->outline_end= "";
op->expand_begin = "";
op->expand_end = "";
op->simulate_small_caps = FALSE;
op->simulate_all_caps = TRUE;
op->simulate_word_underline = TRUE;
op->ascii_translation_table = ascii;
op->ansi_translation_table = ansi;
op->ansi_first_char = 0x80;
op->ansi_last_char = 0x80;
op->cp437_translation_table = cp437;
op->cp437_first_char = 0x80;
op->cp437_last_char = 0x80;
op->cp850_translation_table = cp850;
op->cp850_first_char = 0x80;
op->cp850_last_char = 0x80;
op->mac_translation_table = mac;
op->mac_first_char = 0x80;
op->mac_last_char = 0x80;
op->chars.right_quote = "'";
op->chars.left_quote = "`";
op->chars.right_dbl_quote = "''";
op->chars.left_dbl_quote = "``";
return op;
}

42
programs/media/unrtf/latex.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: latex
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Definitions for the PostScript(TM) output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _LATEX
extern OutputPersonality* latex_init(void);
#define _LATEX
#endif

221
programs/media/unrtf/main.c Executable file
View File

@ -0,0 +1,221 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: main.c
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: main() routine with file open/close.
*----------------------------------------------------------------------
* Changes:
* 14 Oct 00, tuorfa@yahoo.com: added -nopict option
* 15 Oct 00, tuorfa@yahoo.com: added verify_file_type()
* 08 Apr 01, tuorfa@yahoo.com: more GNU-like switches implemented
* 24 Jul 01, tuorfa@yahoo.com: removed verify_file_type()
* 03 Aug 01, tuorfa@yahoo.com: added --inline switch
* 08 Sep 01, tuorfa@yahoo.com: added use of PROGRAM_NAME
* 19 Sep 01, tuorfa@yahoo.com: addition of output personalities
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
* 23 Sep 01, tuorfa@yahoo.com: added wpml switch
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "defs.h"
#include "error.h"
#include "word.h"
#include "convert.h"
#include "parse.h"
#include "hash.h"
#include "malloc.h"
#include "output.h"
#include "html.h"
#include "text.h"
#include "vt.h"
#include "ps.h"
#include "latex.h"
#include "wpml.h"
int nopict_mode; /* TRUE => do not write \pict's to files */
int dump_mode; /* TRUE => output a dump of the word tree */
int debug_mode; /* TRUE => output comments within HTML */
int lineno; /* used for error reporting */
int simple_mode; /* TRUE => output HTML without SPAN/DIV tags */
int inline_mode; /* TRUE => output HTML without HTML/BODY/HEAD */
OutputPersonality *op;
enum {
OP_HTML, OP_TEXT, OP_LATEX, OP_PS, OP_VT, OP_WPML
};
/*========================================================================
* Name: main
* Purpose: Main control function.
* Args: Args.
* Returns: Exit code.
*=======================================================================*/
int
main (int argc, char **argv)
{
FILE *f;
Word * word;
char *path=NULL;
int i;
int output_format = OP_HTML;
nopict_mode = debug_mode = dump_mode = inline_mode = FALSE;
if (argc<2 || argc>7) usage();
for (i=1; i<argc; i++) {
if (!strcmp("--dump",argv[i])) dump_mode=TRUE;
else if (!strcmp("-d",argv[i])) dump_mode=TRUE;
else if (!strcmp("--debug",argv[i])) debug_mode=TRUE;
else if (!strcmp("--simple",argv[i])) simple_mode=TRUE;
else if (!strcmp("--html",argv[i])) output_format=OP_HTML;
else if (!strcmp("--text",argv[i])) output_format=OP_TEXT;
else if (!strcmp("--vt",argv[i])) output_format=OP_VT;
else if (!strcmp("--ps",argv[i])) output_format=OP_PS;
else if (!strcmp("--latex",argv[i])) output_format=OP_LATEX;
else if (!strcmp("--wpml",argv[i])) output_format=OP_WPML;
else if (!strcmp("-t",argv[i])) {
if ((i+1)<argc && *argv[i+1]!='-') {
i++;
if (!strcmp ("html", argv[i]))
output_format=OP_HTML;
else if (!strcmp ("vt", argv[i]))
output_format=OP_VT;
else if (!strcmp ("text", argv[i]))
output_format=OP_TEXT;
else if (!strcmp ("ps", argv[i]))
output_format=OP_PS;
else if (!strcmp ("latex", argv[i]))
output_format=OP_LATEX;
else if (!strcmp ("wpml", argv[i]))
output_format=OP_WPML;
}
}
else if (!strcmp("--inline",argv[i])) inline_mode=TRUE;
else if (!strcmp("--help",argv[i])) {
usage();
exit (0);
}
else if (!strcmp("--version",argv[i])) {
fprintf (stderr, "%s\n", PROGRAM_VERSION);
exit (0);
}
else if (!strcmp("--nopict",argv[i])) nopict_mode=TRUE;
else if (!strcmp("-n",argv[i])) nopict_mode=TRUE;
else {
if (*argv[i]=='-') usage();
if(path)
usage();
else
path=argv[i];
}
}
if (!path) usage();
switch (output_format) {
case OP_TEXT:
op = text_init();
break;
case OP_VT:
op = vt_init();
break;
case OP_HTML:
op = html_init();
break;
case OP_PS:
op = ps_init();
break;
case OP_LATEX:
op = latex_init();
break;
case OP_WPML:
op = wpml_init();
break;
default:
error_handler ("unknown output format");
}
hash_init();
fprintf (stderr, "This is %s, ", PROGRAM_NAME);
fprintf (stderr, "version %s\n", PROGRAM_VERSION);
fprintf (stderr, "By Zach T. Smith\n");
if (debug_mode) fprintf (stderr, "Debug mode.\n");
if (dump_mode) fprintf (stderr, "Dump mode.\n");
f = fopen (path, "r");
if (!f) {
char path2[200];
strcpy(path2,path); strcat(path2,".rtf");
f = fopen(path2, "r");
if(!f)
error_handler ("cannot open input file");
}
fprintf(stderr,"Processing %s...\n", path);
lineno=0;
word = word_read (f);
if (dump_mode) {
word_dump (word);
printf ("\n");
} else {
word_print (word);
}
fclose(f);
fprintf(stderr,"Done.\n");
hash_stats();
if (debug_mode) {
fprintf (stderr, "Total memory allocated %ld bytes.\n",
total_malloced());
}
/* May as well */
word_free (word);
return 0;
}

48
programs/media/unrtf/main.h Executable file
View File

@ -0,0 +1,48 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: main.h
* Author name: Zach Smith
* Create date: 1 Sept 2000
* Purpose: Externs for main.c.
*----------------------------------------------------------------------
* Changes:
* 15 Oct 00, tuorfa@yahoo.com: removed echo_mode extern
* 19 Sep 01, tuorfa@yahoo.com: added output personality
*--------------------------------------------------------------------*/
extern int lineno;
extern int debug_mode;
extern int simple_mode;
extern int inline_mode;
#ifndef _OUTPUT
#include "output.h"
#endif
extern OutputPersonality *op;

125
programs/media/unrtf/malloc.c Executable file
View File

@ -0,0 +1,125 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: malloc
* Author name: Zach Smith
* Create date: 01 Aug 01
* Purpose: Memory management. Allows us to keep track of how
* much memory is being used.
*----------------------------------------------------------------------
* Changes:
* 14 Aug 01, tuorfa@yahoo.com: added Turbo C support.
* 16 Aug 01, Lars Unger <l.unger@tu-bs.de>: added Amiga/GCC support.
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
* 28 Sep 01, tuorfa@yahoo.com: removed Turbo C support.
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#if AMIGA
#include <stdlib.h>
#else
#include <malloc.h>
#endif
#include "error.h"
static unsigned long count=0;
/*========================================================================
* Name: my_malloc
* Purpose: Internal version of malloc necessary for record keeping.
* Args: Amount.
* Returns: Pointer.
*=======================================================================*/
char *
my_malloc (unsigned long size) {
char *ptr;
ptr = malloc (size);
if (ptr)
count += size;
return ptr;
}
/*========================================================================
* Name: my_free
* Purpose: Internal version of free necessary for record keeping.
* Args: Pointer.
* Returns: None.
*=======================================================================*/
void
my_free (char* ptr) {
CHECK_PARAM_NOT_NULL(ptr);
free (ptr);
}
/*========================================================================
* Name: total_malloced
* Purpose: Returns total amount of memory thus far allocated.
* Args: None.
* Returns: Amount.
*=======================================================================*/
unsigned long
total_malloced (void) {
return count;
}
/*========================================================================
* Name: my_strdup
* Purpose: Internal version of strdup necessary for record keeping.
* Args: String.
* Returns: String.
*=======================================================================*/
char *
my_strdup (char *src) {
unsigned long len;
char *ptr;
CHECK_PARAM_NOT_NULL(src);
len = strlen(src);
ptr = my_malloc (len+1);
if (!ptr)
error_handler ("out of memory in strdup()");
strcpy (ptr, src);
return ptr;
}

38
programs/media/unrtf/malloc.h Executable file
View File

@ -0,0 +1,38 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: malloc
* Author name: Zach Smith
* Create date: 1 Aug 2001
* Purpose: Definitions for memory management.
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
extern char * my_malloc (unsigned long);
extern void my_free (char*);
extern unsigned long total_malloced (void);
extern char * my_strdup (char*);

400
programs/media/unrtf/output.c Executable file
View File

@ -0,0 +1,400 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: output
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: Generalized output module
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: addition of functions to change font size
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "output.h"
#include "main.h"
#include "convert.h"
/*========================================================================
* Name: op_create
* Purpose: Creates a blank output personality.
* Args: None.
* Returns: Output personality struct.
*=======================================================================*/
OutputPersonality*
op_create ()
{
OutputPersonality* new_op;
new_op = (OutputPersonality*) my_malloc (sizeof(OutputPersonality));
if (!new_op)
error_handler ("cannot allocate output personality");
bzero ((void*) new_op, sizeof (OutputPersonality));
return new_op;
}
/*========================================================================
* Name: op_free
* Purpose: Deallocates an output personality, but none of the strings
* it points to since they are usually constants.
* Args: OutputPersonality.
* Returns: None.
*=======================================================================*/
void
op_free (OutputPersonality *op)
{
CHECK_PARAM_NOT_NULL(op);
my_free ((void*) op);
}
/*========================================================================
* Name: op_translate_char
* Purpose: Performs a translation of a character in the context of
* a given output personality.
* Args: OutputPersonality, character set#, character.
* Returns: String.
*=======================================================================*/
char *
op_translate_char (OutputPersonality *op, int charset, int ch)
{
short start;
char *result=NULL;
CHECK_PARAM_NOT_NULL(op);
if (ch >= 0x20 && ch < 0x80) {
result = op->ascii_translation_table [ch - 0x20];
}
else
if (charset != CHARSET_ANSI &&
charset != CHARSET_MAC &&
charset != CHARSET_CP437 &&
charset != CHARSET_CP850)
error_handler ("invalid character set value, cannot translate character");
else
switch (charset) {
case CHARSET_ANSI:
start = op->ansi_first_char;
if (ch >= start &&
ch <= op->ansi_last_char)
result = op->ansi_translation_table [ch-start];
break;
case CHARSET_MAC:
start = op->mac_first_char;
if (ch >= start &&
ch <= op->mac_last_char)
result = op->mac_translation_table [ch-start];
break;
case CHARSET_CP437:
start = op->cp437_first_char;
if (ch >= start &&
ch <= op->cp437_last_char)
result = op->cp437_translation_table [ch-start];
break;
case CHARSET_CP850:
start = op->cp850_first_char;
if (ch >= start &&
ch <= op->cp850_last_char)
result = op->cp850_translation_table [ch-start];
break;
}
return result;
}
/*========================================================================
* Name: op_begin_std_fontsize
* Purpose: Prints whatever is necessary to perform a change in the
* current font size.
* Args: OutputPersonality, desired size.
* Returns: None.
*=======================================================================*/
void
op_begin_std_fontsize (OutputPersonality *op, int size)
{
int found_std_expr = FALSE;
CHECK_PARAM_NOT_NULL(op);
/* Look for an exact match with a standard point size.
*/
switch (size) {
case 8:
if (op->fontsize8_begin) {
printf (op->fontsize8_begin);
found_std_expr = TRUE;
}
break;
case 10:
if (op->fontsize10_begin) {
printf (op->fontsize10_begin);
found_std_expr = TRUE;
}
break;
case 12:
if (op->fontsize12_begin) {
printf (op->fontsize12_begin);
found_std_expr = TRUE;
}
break;
case 14:
if (op->fontsize14_begin) {
printf (op->fontsize14_begin);
found_std_expr = TRUE;
}
break;
case 18:
if (op->fontsize18_begin) {
printf (op->fontsize18_begin);
found_std_expr = TRUE;
}
break;
case 24:
if (op->fontsize24_begin) {
printf (op->fontsize24_begin);
found_std_expr = TRUE;
}
break;
case 36:
if (op->fontsize36_begin) {
printf (op->fontsize36_begin);
found_std_expr = TRUE;
}
break;
case 48:
if (op->fontsize48_begin) {
printf (op->fontsize48_begin);
found_std_expr = TRUE;
}
break;
}
/* If no exact match, try to write out a change to the
* exact point size.
*/
if (!found_std_expr) {
if (op->fontsize_begin) {
char expr[16];
sprintf (expr, "%d", size);
printf (op->fontsize_begin, expr);
} else {
/* If we cannot write out a change for the exact
* point size, we must approximate to a standard
* size.
*/
if (size<9 && op->fontsize8_begin) {
printf (op->fontsize8_begin);
} else
if (size<11 && op->fontsize10_begin) {
printf (op->fontsize10_begin);
} else
if (size<13 && op->fontsize12_begin) {
printf (op->fontsize12_begin);
} else
if (size<16 && op->fontsize14_begin) {
printf (op->fontsize14_begin);
} else
if (size<21 && op->fontsize18_begin) {
printf (op->fontsize18_begin);
} else
if (size<30 && op->fontsize24_begin) {
printf (op->fontsize24_begin);
} else
if (size<42 && op->fontsize36_begin) {
printf (op->fontsize36_begin);
} else
if (size>40 && op->fontsize48_begin) {
printf (op->fontsize48_begin);
} else
/* If we can't even produce a good approximation,
* just try to get a font size near 12 point.
*/
if (op->fontsize12_begin)
printf (op->fontsize12_begin);
else
if (op->fontsize14_begin)
printf (op->fontsize14_begin);
else
if (op->fontsize10_begin)
printf (op->fontsize10_begin);
else
if (op->fontsize18_begin)
printf (op->fontsize18_begin);
else
if (op->fontsize8_begin)
printf (op->fontsize8_begin);
else
error_handler ("output personality lacks sufficient font size change capability");
}
}
}
/*========================================================================
* Name: op_end_std_fontsize
* Purpose: Prints whatever is necessary to perform a change in the
* current font size.
* Args: OutputPersonality, desired size.
* Returns: None.
*=======================================================================*/
void
op_end_std_fontsize (OutputPersonality *op, int size)
{
int found_std_expr = FALSE;
CHECK_PARAM_NOT_NULL(op);
/* Look for an exact match with a standard point size.
*/
switch (size) {
case 8:
if (op->fontsize8_end) {
printf (op->fontsize8_end);
found_std_expr = TRUE;
}
break;
case 10:
if (op->fontsize10_end) {
printf (op->fontsize10_end);
found_std_expr = TRUE;
}
break;
case 12:
if (op->fontsize12_end) {
printf (op->fontsize12_end);
found_std_expr = TRUE;
}
break;
case 14:
if (op->fontsize14_end) {
printf (op->fontsize14_end);
found_std_expr = TRUE;
}
break;
case 18:
if (op->fontsize18_end) {
printf (op->fontsize18_end);
found_std_expr = TRUE;
}
break;
case 24:
if (op->fontsize24_end) {
printf (op->fontsize24_end);
found_std_expr = TRUE;
}
break;
case 36:
if (op->fontsize36_end) {
printf (op->fontsize36_end);
found_std_expr = TRUE;
}
break;
case 48:
if (op->fontsize48_end) {
printf (op->fontsize48_end);
found_std_expr = TRUE;
}
break;
}
/* If no exact match, try to write out a change to the
* exact point size.
*/
if (!found_std_expr) {
if (op->fontsize_end) {
char expr[16];
sprintf (expr, "%d", size);
printf (op->fontsize_end, expr);
} else {
/* If we cannot write out a change for the exact
* point size, we must approximate to a standard
* size.
*/
if (size<9 && op->fontsize8_end) {
printf (op->fontsize8_end);
} else
if (size<11 && op->fontsize10_end) {
printf (op->fontsize10_end);
} else
if (size<13 && op->fontsize12_end) {
printf (op->fontsize12_end);
} else
if (size<16 && op->fontsize14_end) {
printf (op->fontsize14_end);
} else
if (size<21 && op->fontsize18_end) {
printf (op->fontsize18_end);
} else
if (size<30 && op->fontsize24_end) {
printf (op->fontsize24_end);
} else
if (size<42 && op->fontsize36_end) {
printf (op->fontsize36_end);
} else
if (size>40 && op->fontsize48_end) {
printf (op->fontsize48_end);
} else
/* If we can't even produce a good approximation,
* just try to get a font size near 12 point.
*/
if (op->fontsize12_end)
printf (op->fontsize12_end);
else
if (op->fontsize14_end)
printf (op->fontsize14_end);
else
if (op->fontsize10_end)
printf (op->fontsize10_end);
else
if (op->fontsize18_end)
printf (op->fontsize18_end);
else
if (op->fontsize8_end)
printf (op->fontsize8_end);
else
error_handler ("output personality lacks sufficient font size change capability");
}
}
}

258
programs/media/unrtf/output.h Executable file
View File

@ -0,0 +1,258 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: output
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: Definitions for the generalized output module
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _OUTPUT
typedef struct {
char *comment_begin;
char *comment_end;
char *document_begin;
char *document_end;
char *header_begin;
char *header_end;
char *document_title_begin;
char *document_title_end;
char *document_keywords_begin;
char *document_keywords_end;
char *document_author_begin;
char *document_author_end;
char *document_changedate_begin;
char *document_changedate_end;
char *body_begin;
char *body_end;
char *word_begin;
char *word_end;
char *paragraph_begin;
char *paragraph_end;
char *center_begin;
char *center_end;
char *align_left_begin;
char *align_left_end;
char *align_right_begin;
char *align_right_end;
char *justify_begin;
char *justify_end;
char *forced_space;
char *line_break;
char *page_break;
char *hyperlink_begin;
char *hyperlink_end;
char *imagelink_begin;
char *imagelink_end;
char *table_begin;
char *table_end;
char *table_row_begin;
char *table_row_end;
char *table_cell_begin;
char *table_cell_end;
/* Character attributes */
char *font_begin;
char *font_end;
char *fontsize_begin;
char *fontsize_end;
/* standard font sizes are optional */
char *fontsize8_begin;
char *fontsize8_end;
char *fontsize10_begin;
char *fontsize10_end;
char *fontsize12_begin;
char *fontsize12_end;
char *fontsize14_begin;
char *fontsize14_end;
char *fontsize18_begin;
char *fontsize18_end;
char *fontsize24_begin;
char *fontsize24_end;
char *fontsize36_begin;
char *fontsize36_end;
char *fontsize48_begin;
char *fontsize48_end;
char *smaller_begin;
char *smaller_end;
char *bigger_begin;
char *bigger_end;
char *foreground_begin;
char *foreground_end;
char *background_begin;
char *background_end;
char *bold_begin;
char *bold_end;
char *italic_begin;
char *italic_end;
char *underline_begin;
char *underline_end;
char *dbl_underline_begin;
char *dbl_underline_end;
char *superscript_begin;
char *superscript_end;
char *subscript_begin;
char *subscript_end;
char *strikethru_begin;
char *strikethru_end;
char *dbl_strikethru_begin;
char *dbl_strikethru_end;
char *emboss_begin;
char *emboss_end;
char *engrave_begin;
char *engrave_end;
char *shadow_begin;
char *shadow_end;
char *outline_begin;
char *outline_end;
char *small_caps_begin;
char *small_caps_end;
char *pointlist_begin;
char *pointlist_end;
char *pointlist_item_begin;
char *pointlist_item_end;
char *numericlist_begin;
char *numericlist_end;
char *numericlist_item_begin;
char *numericlist_item_end;
char *expand_begin;
char *expand_end;
char *toc_entry_begin;
char *toc_entry_end;
char *index_entry_begin;
char *index_entry_end;
/* XX These should really be replaced by references
* to one of the charsets.
*/
struct {
char *bullet;
char *left_quote;
char *right_quote;
char *left_dbl_quote;
char *right_dbl_quote;
char *nonbreaking_space;
char *emdash;
char *endash;
char *lessthan;
char *greaterthan;
char *amp;
char *copyright;
char *trademark;
char *nonbreaking_hyphen;
char *optional_hyphen;
} chars;
char **ascii_translation_table;
int simulate_small_caps : 1;
int simulate_all_caps : 1;
int simulate_word_underline : 1;
char **ansi_translation_table;
short ansi_first_char;
short ansi_last_char;
char **cp437_translation_table;
short cp437_first_char;
short cp437_last_char;
char **cp850_translation_table;
short cp850_first_char;
short cp850_last_char;
char **mac_translation_table;
short mac_first_char;
short mac_last_char;
void (*write_set_foreground) (int,int,int);
}
OutputPersonality;
extern OutputPersonality* op_create(void);
extern void op_free (OutputPersonality*);
extern char* op_translate_char (OutputPersonality*,int,int);
extern void op_begin_std_fontsize (OutputPersonality*, int);
extern void op_end_std_fontsize (OutputPersonality*, int);
#define _OUTPUT
#endif

444
programs/media/unrtf/parse.c Executable file
View File

@ -0,0 +1,444 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: parse
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: Parsing of the RTF file into a structure of Word objects.
*----------------------------------------------------------------------
* Changes:
* 15 Oct 00, tuorfa@yahoo.com: parse.c created with functions taken from word.c
* 15 Oct 00, tuorfa@yahoo.com: backslash before newline is now \par
* 08 Apr 01, tuorfa@yahoo.com: removed limit on word length
* 03 Aug 01, tuorfa@yahoo.com: added input buffering
* 19 Sep 01, tuorfa@yahoo.com: cleaned up read_word()
* 22 Sep 01, tuorfa@yahoo.com: moved word_dump() to word.c
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "defs.h"
#include "parse.h"
#include "malloc.h"
#include "main.h"
#include "error.h"
#include "word.h"
#include "hash.h"
/* local to getchar stuff */
static int ungot_char=-1;
static int ungot_char2=-1;
static int ungot_char3=-1;
/*========================================================================
* Name: my_unget_char
* Purpose: My own unget routine, handling up to 3 ungot characters.
* Args: Character.
* Returns: None.
*=======================================================================*/
static void my_unget_char (int ch)
{
if (ungot_char>=0 && ungot_char2>=0 && ungot_char3>=0)
error_handler ("more than 3 ungot chars");
ungot_char3 = ungot_char2;
ungot_char2 = ungot_char;
ungot_char = ch;
}
static int last_returned_ch=0;
#define READ_BUF_LEN 2048
static int buffer_size = 0;
static char *read_buf = NULL;
static int read_buf_end = 0;
static int read_buf_index = 0;
/*========================================================================
* Name: my_getchar
* Purpose: Gets a character: either an ungot one, or a buffered one.
* Args: Input file.
* Returns: Character, or EOF.
*=======================================================================*/
static int my_getchar (FILE* f)
{
int ch;
CHECK_PARAM_NOT_NULL(f);
if (ungot_char>=0) {
ch = ungot_char;
ungot_char=ungot_char2;
ungot_char2=ungot_char3;
ungot_char3=-1;
last_returned_ch = ch;
return ch;
}
do {
if (read_buf_index >= read_buf_end) {
if (!read_buf) {
buffer_size = READ_BUF_LEN;
read_buf = my_malloc (buffer_size);
if (!read_buf) {
buffer_size /= 4;
read_buf = my_malloc (buffer_size);
if (!read_buf)
error_handler ("cannot allocate read buffer");
}
}
read_buf_end = fread (read_buf, 1, buffer_size, f);
read_buf_index = 0;
if (!read_buf_end)
return EOF;
}
ch = read_buf [read_buf_index++];
if (ch=='\n') {
lineno++;
/* Convert \(newline) into \par here */
if (last_returned_ch=='\\') {
my_unget_char (' ');
my_unget_char ('r');
my_unget_char ('a');
ch = 'p';
break;
}
}
}
while (ch=='\r' /* || ch=='\n' */ );
if (ch=='\t') ch = ' ';
last_returned_ch = ch;
return ch;
}
/* local to read_word */
static char *input_str = NULL;
static unsigned long current_max_length = 1;
/*========================================================================
* Name: expand_word_buffer
* Purpose: Expands the buffer used to store an incoming word.
* This allows us to remove the limit on word length.
* Args: None.
* Returns: None.
*=======================================================================*/
static int
expand_word_buffer ()
{
char *new_ptr;
unsigned long old_length;
if (!input_str)
error_handler ("no input buffer allocated");
old_length = current_max_length;
current_max_length *= 2;
new_ptr = my_malloc (current_max_length);
if (!new_ptr)
error_handler ("out of memory while resizing buffer");
memcpy (new_ptr, input_str, old_length);
my_free (input_str);
input_str = new_ptr;
return TRUE;
}
/*========================================================================
* Name: read_word
* Purpose: The core of the parser, this reads a word.
* Args: Input file.
* Returns: Number of characters in the word, or zero.
* Note: The word buffer is static and local to this file.
*=======================================================================*/
static int
read_word (FILE *f)
{
int ch, ch2, ix=0;
int have_whitespace=FALSE;
int is_control_word=FALSE;
int has_numeric_param=FALSE; /* if is_control_word==TRUE */
int need_unget=FALSE;
CHECK_PARAM_NOT_NULL(f);
current_max_length = 10; /* XX */
/* Get some storage for a word.
*/
input_str = my_malloc (current_max_length);
if (!input_str)
error_handler("cannot allocate word storage");
do {
ch = my_getchar(f);
}
while (ch=='\n');
if (ch==' ')
{
/* Compress multiple space chars down to one.
*/
while (ch == ' ') {
ch = my_getchar(f);
have_whitespace=TRUE;
}
if (have_whitespace) {
my_unget_char (ch);
input_str[0]=' ';
input_str[1]=0;
return 1;
}
}
switch(ch)
{
case EOF:
return 0;
case '\\':
ch2 = my_getchar(f);
/* Look for two-character command words.
*/
switch (ch2)
{
case '\n':
strcpy (input_str, "\\par");
return 4;
case '~':
case '{':
case '}':
case '\\':
case '_':
case '-':
input_str[0] = '\\';
input_str[1] = ch2;
input_str[2] = 0;
return 2;
case '\'':
/* Preserve \'## expressions (hex char exprs) for later.
*/
input_str[0]='\\';
input_str[1]='\'';
ix=2;
if(ix==current_max_length) {
if (!expand_word_buffer ())
error_handler("word too long");
}
ch = my_getchar(f);
input_str[ix++]=ch;
if(ix==current_max_length) {
if (!expand_word_buffer ())
error_handler("word too long");
}
ch = my_getchar(f);
input_str[ix++]=ch;
if(ix==current_max_length) {
if (!expand_word_buffer ())
error_handler("word too long");
}
input_str[ix]=0;
return ix;
}
is_control_word=TRUE;
ix=1;
input_str[0]=ch;
ch=ch2;
break;
case '\t':
/* In RTF, a tab char is the same as \tab.
*/
strcpy (input_str, "\\tab");
return 4;
case '{':
case '}':
case ';':
input_str[0]=ch;
input_str[1]=0;
return 1;
}
while (ch!=EOF)
{
/* Several chars always ends a word, and we need to save them.
*/
if (ch=='\t' || ch=='{' || ch=='}' || ch=='\\') {
need_unget=TRUE;
break;
}
/* A newline always ends a command word; we don't save it.
* A newline is ignored if this is not a command word.
*/
if (ch=='\n') {
if (is_control_word)
break;
ch = my_getchar(f);
continue;
}
/* A semicolon always ends a command word; we do save it.
* A semicolon never ends a regular word.
*/
if (ch==';') {
if (is_control_word) {
need_unget=TRUE;
break;
}
}
/* In this parser, a space character terminates
* any word, and if it does not follow a command,
* then it is a word in itself.
*/
if (ch==' ') {
if (!is_control_word)
need_unget=TRUE;
break;
}
/* Identify a control word's numeric parameter.
*/
if (is_control_word) {
if (!has_numeric_param && (isdigit(ch) || ch=='-'))
has_numeric_param = TRUE;
else
if (has_numeric_param && !isdigit(ch)) {
if (ch!=' ')
need_unget=TRUE;
break;
}
}
input_str[ix++] = ch;
if (ix==current_max_length) {
if (!expand_word_buffer ())
error_handler("word too long");
}
ch = my_getchar (f);
}
if (need_unget)
my_unget_char(ch);
input_str[ix]=0;
return ix;
}
/*========================================================================
* Name: word_read
* Purpose: This is the recursive metareader which pieces together the
* structure of Word objects.
* Args: Input file.
* Returns: Tree of Word objects.
*=======================================================================*/
Word *
word_read (FILE* f) {
Word * prev_word = NULL;
Word * first_word = NULL;
Word * new_word = NULL; /* temp */
CHECK_PARAM_NOT_NULL(f);
do {
if (!read_word(f)) {
return first_word;
}
if (input_str[0] == '{') {
/* Process subwords */
#if 0
printf ("processing subword...\n");
#endif
/* Create a dummy word to point to a sublist */
new_word = word_new (NULL);
if (!new_word)
error_handler ("cannot allocate word");
/* Get the sublist */
new_word->child = word_read (f);
if (!new_word->hash_index && !new_word->child)
{
/* printf ("unable to read children!\n"); */
}
} else if (input_str[0] == '}') {
#if 0
printf ("returning from word_read.\n");
#endif
return first_word;
} else {
new_word = word_new (input_str);
}
if (prev_word) prev_word->next = new_word;
if (!first_word) first_word = new_word;
prev_word = new_word;
/* Free up the memory allocated by read_word.
*/
my_free (input_str);
input_str = NULL;
}
while(1);
}

44
programs/media/unrtf/parse.h Executable file
View File

@ -0,0 +1,44 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: parse.h
* Author name: Zach Smith
* Create date: 15 Oct 2000
* Purpose: Definitions and externs for parse.c.
*----------------------------------------------------------------------
* Changes:
* 15 Oct 00, tuorfa@yahoo.com: parse.h created with functions taken from word.c
*--------------------------------------------------------------------*/
#ifndef _WORD
#include "word.h"
#endif
extern Word *word_read(FILE*);

610
programs/media/unrtf/ps.c Executable file
View File

@ -0,0 +1,610 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: ps
* Author name: Zach Smith
* Create date: 18 Sep 01
* Purpose: PostScript(TM)-specific output module
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
* 23 Sep 01, tuorfa@yahoo.com: added shadow, outline, strikethru, underline
* 23 Sep 01, tuorfa@yahoo.com: revised PS program to use ISOLatin1Encoding
* 28 Sep 01, tuorfa@yahoo.com: added support for Helvetica,Courier,Symbol
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char*
ascii [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "\%", "&", "'",
/* 0x28 */ "\\(", "\\)", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "\\[", "\\\\", "\\]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char* ansi [] = {
/* 0x80 */ "\\200","\\201","\\202","\\203",
/* 0x84 */ "\\204","\\205","\\206","\\207",
/* 0x88 */ "\\210","\\211","\\212","\\213",
/* 0x8c */ "\\214","\\215","\\216","\\217",
/* 0x90 */ "\\220","\\221","\\222","\\223",
/* 0x94 */ "\\224","\\225","\\226","\\227",
/* 0x98 */ "\\230","\\231","\\232","\\233",
/* 0x9c */ "\\234","\\235","\\236","\\237",
/* 0xa0 */ "\\240","\\241","\\242","\\243",
/* 0xa4 */ "\\244","\\245","\\246","\\247",
/* 0xa8 */ "\\250","\\251","\\252","\\253",
/* 0xac */ "\\254","\\255","\\256","\\257",
/* 0xb0 */ "\\260","\\261","\\262","\\263",
/* 0xb4 */ "\\264","\\265","\\266","\\267",
/* 0xb8 */ "\\270","\\271","\\272","\\273",
/* 0xbc */ "\\274","\\275","\\276","\\277",
/* 0xc0 */ "\\300","\\301","\\302","\\303",
/* 0xc4 */ "\\304","\\305","\\306","\\307",
/* 0xc8 */ "\\310","\\311","\\312","\\313",
/* 0xcc */ "\\314","\\315","\\316","\\317",
/* 0xd0 */ "\\320","\\321","\\322","\\323",
/* 0xd4 */ "\\324","\\325","\\326","\\327",
/* 0xd8 */ "\\330","\\331","\\332","\\333",
/* 0xdc */ "\\334","\\335","\\336","\\337",
/* 0xe0 */ "\\340","\\341","\\342","\\343",
/* 0xe4 */ "\\344","\\345","\\346","\\347",
/* 0xe8 */ "\\350","\\351","\\352","\\353",
/* 0xec */ "\\354","\\355","\\356","\\357",
/* 0xf0 */ "\\360","\\361","\\362","\\363",
/* 0xf4 */ "\\364","\\365","\\366","\\367",
/* 0xf8 */ "\\370","\\371","\\372","\\373",
/* 0xfc */ "\\374","\\375","\\376","\\377",
};
static char* mac [] = {
"?",
};
static char* cp437 [] = {
"?",
};
static char* cp850 [] = {
"?",
};
#define PS_END "\
%% --------- \n\n\
didShowPage not { \n\
showpage \n\
} if\n\n\
%%%%EOF\n"
#define PS_START "\
%%%%!PS\n\
%%--------------------------------------------------------------------------\n\
%% GNU UnRTF, a command-line program to convert RTF documents to other formats.\n\
%% Copyright (C) 2000,2001 Zachary Thayer Smith\n\
%%\n\
%% This program is free software; you can redistribute it and/or modify\n\
%% it under the terms of the GNU General Public License as published by\n\
%% the Free Software Foundation; either version 2 of the License, or\n\
%% (at your option) any later version.\n\
%%\n\
%% This program is distributed in the hope that it will be useful,\n\
%% but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
%% GNU General Public License for more details.\n\
%%\n\
%% You should have received a copy of the GNU General Public License\n\
%% along with this program; if not, write to the Free Software\n\
%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n\
%%\n\
%% The author is reachable by electronic mail at tuorfa@yahoo.com.\n\
%%--------------------------------------------------------------------------\n\
%%%%EndComments \n\
%%\n\
%% --------- Note, this PS code is unfinished -------- \n\
%% --------- Note, this PS code is unfinished -------- \n\
%% --------- Note, this PS code is unfinished -------- \n\
%% --------- Note, this PS code is unfinished -------- \n\
%% --------- Note, this PS code is unfinished -------- \n\
%%\n\
%% ----------- Variables ------------\n\
/fontFamily /Times def\n\
/fontAscent 0 def %% ascent for current font\n\
/fontDescent 0 def %% descent for current font\n\
/lineAscent 0 def \n\
/lineDescent 0 def \n\
/pageWidthInches 8.5 def \n\
/pageHeightInches 11 def \n\
/leftMargin 20 def \n\
/rightMargin 20 def \n\
/topMargin 20 def \n\
/bottomMargin 20 def \n\
/DPI 72 def \n\
/pageWidth pageWidthInches DPI mul def \n\
/rightLimit pageWidth rightMargin sub def \n\
/pageHeight pageHeightInches DPI mul def \n\
/x 0 def \n\
/y 0 def \n\
/bold false def \n\
/italic false def \n\
/underline false def \n\
/overline false def \n\
/intercharSpace 0 def \n\
/strike false def \n\
/outline false def \n\
/shadow false def \n\
/fontSize 12 def \n\
/didBR false def \n\
/didParSkip false def \n\
/didShowPage false def \n\
%%------------------------------------------------------\n\
%% Set up the ISO fonts \n\
%% Times \n\
%% ----- \n\
/Times-Roman findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/TRomanISO exch definefont pop \n\
/Times-Bold findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/TBoldISO exch definefont pop \n\
/Times-BoldItalic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/TBoldItalicISO exch definefont pop \n\
/Times-Italic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/TItalicISO exch definefont pop \n\
%% Courier \n\
%% ----- \n\
/Courier-Roman findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/CRomanISO exch definefont pop \n\
/Courier-Bold findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/CBoldISO exch definefont pop \n\
/Courier-BoldItalic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/CBoldItalicISO exch definefont pop \n\
/Courier-Italic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/CItalicISO exch definefont pop \n\
%% Symbol \n\
%% ----- \n\
/Symbol-Roman findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/SRomanISO exch definefont pop \n\
/Symbol-Bold findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/SBoldISO exch definefont pop \n\
/Symbol-BoldItalic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/SBoldItalicISO exch definefont pop \n\
/Symbol-Italic findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/SItalicISO exch definefont pop \n\
%% Helvetica \n\
%% --------- \n\
/Helvetica-Roman findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/HRomanISO exch definefont pop \n\
/Helvetica-Bold findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/HBoldISO exch definefont pop \n\
/Helvetica-BoldOblique findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/HBoldItalicISO exch definefont pop \n\
/Helvetica-Oblique findfont dup length dict begin { \n\
1 index /FID ne { def } { pop pop } ifelse \n\
} forall \n\
/Encoding ISOLatin1Encoding def \n\
currentdict end \n\
/HItalicISO exch definefont pop \n\
%% \n\
%% Ideally, before we can draw a line of text, we need to collect all the\n\
%% words that will be on it, just as I do in my Beest HTML viewer, as well\n\
%% as character attributes for each word. But for now, this implementation \n\
%% does not bother. It determines the maximize ascent and descent after\n\
%% drawing the text, not before. XX\n\
%% \n\
%% ----------- Functions ------------\n\
/updateFont { \n\
/f0 null def \n\
(Times) fontFamily eq (Times New Roman) fontFamily eq or { bold { \n\
italic { /TBoldItalicISO } { /TBoldISO } ifelse \n\
} { \n\
italic { /TItalicISO } { /TRomanISO } ifelse \n\
} \n\
ifelse \n\
} if (Helvetica) fontFamily eq (Arial) fontFamily eq or { bold { \n\
italic { /HBoldItalicISO } { /HBoldISO } ifelse \n\
} { \n\
italic { /HItalicISO } { /HRomanISO } ifelse \n\
} \n\
ifelse \n\
} if (Courier) fontFamily eq (Courier New) fontFamily eq or { bold { \n\
italic { /CBoldItalicISO } { /CBoldISO } ifelse \n\
} { \n\
italic { /CItalicISO } { /CRomanISO } ifelse \n\
} \n\
ifelse \n\
} if (Symbol) fontFamily eq { bold { \n\
italic { /SBoldItalicISO } { /SBoldISO } ifelse \n\
} { \n\
italic { /SItalicISO } { /SRomanISO } ifelse \n\
} \n\
ifelse \n\
} if findfont /f0 exch def \n\
/bboxBottom f0 /FontBBox get 1 get 1000 div fontSize mul -1 mul def \n\
/bboxTop f0 /FontBBox get 3 get 1000 div fontSize mul def \n\
f0 fontSize scalefont setfont \n\
lineAscent bboxTop lt { /lineAscent bboxTop def } if \n\
lineDescent bboxBottom lt { /lineDescent bboxBottom def } if \n\
/fontAscent bboxTop def \n\
/fontDescent bboxBottom def \n\
} def\n\
/FS { \n\
/fontSize exch def updateFont \n\
} def \n\
/F { \n\
/fontFamily exch def updateFont \n\
} def \n\
/resetX { \n\
/x leftMargin def\n\
} def \n\
/resetY { \n\
/y pageHeight topMargin sub def \n\
} def \n\
/BR { \n\
/oldx x def \n\
/y y lineAscent lineDescent add sub def \n\
resetX \n\
y bottomMargin lt { \n\
showpage \n\
/didShowPage true \n\
resetY \n\
} if \n\
oldx 0 eq didBR and { /didParSkip true def } if \n\
/didBR true def \n\
% /lineAscent 0 def \n\
% /lineDescent 0 def \n\
} def \n\
/P { \n\
didParSkip not { BR } if \n\
didParSkip not { BR } if \n\
} \n\
def \n\
/acharpath { \n\
/acstr exch def pop /acsp exch def newpath str { /ch exch def 1 string 0 ch put false charpath acsp 0 rmoveto } forall} def \n\
/A { \n\
/str exch def \n\
/w str stringwidth pop \n\
str length intercharSpace mul add \n\
def \n\
x w add rightLimit ge { BR } if \n\
x y moveto \n\
outline { \n\
shadow { \n\
1 -0.1 0 { \n\
/offset exch def \n\
offset setgray \n\
x offset 3 mul add y offset 3 mul sub moveto intercharSpace 0 str acharpath \n\
%% str false charpath \n\
fontSize 30 div setlinewidth stroke \n\
} for \n\
0 setgray \n\
} { \n\
intercharSpace 0 str acharpath \n\
%% str false charpath \n\
fontSize 30 div setlinewidth stroke \n\
} ifelse \n\
} { \n\
shadow { \n\
1 -0.1 0 { \n\
/offset exch def \n\
offset setgray \n\
x offset 3 mul add y offset 3 mul sub moveto intercharSpace 0 str ashow \n\
%% str show \n\
} for \n\
0 setgray } { \n\
intercharSpace 0 str ashow \n\
%% str show \n\
} ifelse \n\
} ifelse \n\
strike { \n\
newpath fontSize 20 div setlinewidth \n\
x y fontAscent 0.32 mul add dup /y2 exch def moveto \n\
x w add y2 lineto stroke \n\
} if \n\
underline { \n\
newpath fontSize 20 div setlinewidth \n\
x y fontAscent 0.2 mul sub dup /y2 exch def moveto \n\
x w add y2 lineto stroke \n\
} if \n\
overline { \n\
%% I don't think RTF supports this, but it can be used later. \n\
newpath fontSize 20 div setlinewidth \n\
x y fontAscent 1.2 mul add dup /y2 exch def moveto \n\
x w add y2 lineto stroke \n\
} if \n\
/x x w add def \n\
/didBR false def \n\
/didShowPage false def \n\
} def \n\
\n\
%% These are only binary for now \n\
/X1 { /intercharSpace exch def } def\n\
/X0 { /intercharSpace 0 def } def\n\
/O1 { /outline false def } def\n\
/O0 { /outline false def } def\n\
/H1 { /shadow true def } def\n\
/H0 { /shadow false def } def\n\
/S1 { /strike true def } def\n\
/S0 { /strike false def } def\n\
/B1 { /bold true def updateFont } def\n\
/B0 { /bold false def updateFont } def\n\
/I1 { /italic true def updateFont } def\n\
/I0 { /italic false def updateFont } def\n\
/U1 { /underline true def } def\n\
/U0 { /underline false def } def\n\
updateFont \n\
resetX resetY \n\
\n\
"
/*========================================================================
* Name: ps_init
* Purpose: Generates an OutputPersonality object for the PostScript(TM)
* format.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
ps_init (void)
{
OutputPersonality* op;
op = op_create();
op->comment_begin = "%% ";
op->comment_end = "\n";
op->word_begin = "(";
op->word_end = ")A ";
op->document_begin = PS_START;
op->document_end = PS_END;
op->header_begin = "%% header begin\n";
op->header_end = "%% header end\n";
op->document_title_begin = "%%%%Title: ";
op->document_title_end = "\n";
op->document_author_begin = "%%%%Creator: ";
op->document_author_end = "\n";
op->document_changedate_begin = "%% CHANGED: ";
op->document_changedate_end = "\n";
op->body_begin = "\n\n%% ---------- Document Body ------------\n";
op->body_end = "\n";
op->paragraph_begin = "P ";
op->paragraph_end = "\n";
op->center_begin = "";
op->center_end = "";
op->justify_begin = "";
op->justify_end = "";
op->align_left_begin = "";
op->align_left_end = "";
op->align_right_begin = "";
op->align_right_end = "";
op->forced_space = " ";
op->line_break = "BR\n";
op->page_break = "\n";
op->hyperlink_begin = "U1(";
op->hyperlink_end = ")A U0 ";
op->imagelink_begin = "";
op->imagelink_end = "";
op->table_begin = "\n% TABLE BEGINS (not implemented)\nP\n(TABLE)A BR\n";
op->table_end = "\n% TABLE ENDS (not implemented)\nP\n";
op->table_row_begin = "( )A ";
op->table_row_end = "( |)A BR\n";
op->table_cell_begin = "( | )A ";
op->table_cell_end = "";
/* Character attributes */
op->font_begin = "(%s) F ";
op->font_end = "";
op->fontsize_begin = "%s FS ";
op->fontsize_end = "";
op->smaller_begin = "";
op->smaller_end = "";
op->bigger_begin = "";
op->bigger_end = "";
op->foreground_begin = "";
op->foreground_end = "";
op->background_begin = "";
op->background_end = "";
op->bold_begin = "B1 ";
op->bold_end = "B0 ";
op->italic_begin = "I1 ";
op->italic_end = "I0 ";
op->underline_begin = "U1 ";
op->underline_end = "U0 ";
op->dbl_underline_begin = "U1 ";
op->dbl_underline_end = "U0 ";
op->superscript_begin = "";
op->superscript_end = "";
op->subscript_begin = "";
op->subscript_end = "";
op->strikethru_begin = "S1 ";
op->strikethru_end = "S0 ";
op->dbl_strikethru_begin = "S1 ";
op->dbl_strikethru_end = "S0 ";
op->emboss_begin="";
op->emboss_end = "";
op->engrave_begin = "";
op->engrave_end = "";
op->shadow_begin= "H1 ";
op->shadow_end= "H0 ";
op->outline_begin= "O1 ";
op->outline_end= "O0 ";
op->expand_begin = "%s X1 ";
op->expand_end = "X0 ";
op->simulate_small_caps = TRUE;
op->simulate_all_caps = TRUE;
op->simulate_word_underline = TRUE;
op->ascii_translation_table = ascii;
op->ansi_translation_table = ansi;
op->ansi_first_char = 0x80;
op->ansi_last_char = 0xff;
op->cp437_translation_table = cp437;
op->cp437_first_char = 0x80;
op->cp437_last_char = 0x80;
op->cp850_translation_table = cp850;
op->cp850_first_char = 0x80;
op->cp850_last_char = 0x80;
op->mac_translation_table = mac;
op->mac_first_char = 0x80;
op->mac_last_char = 0x80;
op->chars.right_quote = "'";
op->chars.left_quote = "`";
op->chars.right_dbl_quote = "''";
op->chars.left_dbl_quote = "``";
return op;
}

42
programs/media/unrtf/ps.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: ps
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Definitions for the PostScript(TM) output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _PS
extern OutputPersonality* ps_init(void);
#define _PS
#endif

262
programs/media/unrtf/text.c Executable file
View File

@ -0,0 +1,262 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: text
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Plain text output module
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char*
ascii_translation_table [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char*
upper_translation_table [128] = {
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
};
/*========================================================================
* Name: text_init
* Purpose: Generates an output personality for the plain text format.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
text_init (void)
{
OutputPersonality* text_op;
text_op = op_create();
text_op->comment_begin = "### ";
text_op->comment_end = "\n";
text_op->document_begin = "";
text_op->document_end = "";
text_op->header_begin = "";
text_op->header_end = "";
text_op->document_title_begin = "\nTITLE: ";
text_op->document_title_end = "\n";
text_op->document_author_begin = "\nAUTHOR: ";
text_op->document_author_end = "\n";
text_op->document_changedate_begin = "\nDATE: ";
text_op->document_changedate_end = "\n";
text_op->body_begin = "\n-----------------\n";
text_op->body_end = "";
text_op->paragraph_begin = "";
text_op->paragraph_end = "\n";
text_op->center_begin = "";
text_op->center_end = "";
text_op->justify_begin = "";
text_op->justify_end = "";
text_op->align_left_begin = "";
text_op->align_left_end = "";
text_op->align_right_begin = "";
text_op->align_right_end = "";
text_op->forced_space = " ";
text_op->line_break = "\n";
text_op->page_break = "\n";
text_op->hyperlink_begin = "";
text_op->hyperlink_end = "";
text_op->imagelink_begin = "";
text_op->imagelink_end = "";
text_op->table_begin = "\n";
text_op->table_end = "\n";
text_op->table_row_begin = "";
text_op->table_row_end = "\n";
text_op->table_cell_begin = "\t";
text_op->table_cell_end = "";
/* Character attributes */
text_op->font_begin = "";
text_op->font_end = "";
text_op->fontsize_begin = "";
text_op->fontsize_end = "";
text_op->fontsize8_begin = "";
text_op->fontsize8_end = "";
text_op->fontsize10_begin = "";
text_op->fontsize10_end = "";
text_op->fontsize12_begin = "";
text_op->fontsize12_end = "";
text_op->fontsize14_begin = "";
text_op->fontsize14_end = "";
text_op->fontsize18_begin = "";
text_op->fontsize18_end = "";
text_op->fontsize24_begin = "";
text_op->fontsize24_end = "";
text_op->smaller_begin = "";
text_op->smaller_end = "";
text_op->bigger_begin = "";
text_op->bigger_end = "";
text_op->foreground_begin = "";
text_op->foreground_end = "";
text_op->background_begin = "";
text_op->background_end = "";
text_op->bold_begin = "";
text_op->bold_end = "";
text_op->italic_begin = "";
text_op->italic_end = "";
text_op->underline_begin = "";
text_op->underline_end = "";
text_op->dbl_underline_begin = "";
text_op->dbl_underline_end = "";
text_op->superscript_begin = "";
text_op->superscript_end = "";
text_op->subscript_begin = "";
text_op->subscript_end = "";
text_op->strikethru_begin = "";
text_op->strikethru_end = "";
text_op->dbl_strikethru_begin = "";
text_op->dbl_strikethru_end = "";
text_op->emboss_begin="";
text_op->emboss_end = "";
text_op->engrave_begin = "";
text_op->engrave_end = "";
text_op->shadow_begin= "";
text_op->shadow_end= "";
text_op->outline_begin= "";
text_op->outline_end= "";
text_op->expand_begin = "";
text_op->expand_end = "";
text_op->pointlist_begin = "\n";
text_op->pointlist_end = "\n";
text_op->pointlist_item_begin = " * ";
text_op->pointlist_item_end = "\n";
text_op->numericlist_begin = "\n";
text_op->numericlist_end = "\n";
text_op->numericlist_item_begin = " # ";
text_op->numericlist_item_end = "\n";
text_op->simulate_small_caps = TRUE;
text_op->simulate_all_caps = TRUE;
text_op->simulate_word_underline = TRUE;
text_op->ascii_translation_table = ascii_translation_table;
text_op->ansi_translation_table = upper_translation_table;
text_op->ansi_first_char = 0x80;
text_op->ansi_last_char = 0xff;
text_op->cp437_translation_table = upper_translation_table;
text_op->cp437_first_char = 0x80;
text_op->cp437_last_char = 0xff;
text_op->cp850_translation_table = upper_translation_table;
text_op->cp850_first_char = 0x80;
text_op->cp850_last_char = 0xff;
text_op->mac_translation_table = upper_translation_table;
text_op->mac_first_char = 0x80;
text_op->mac_last_char = 0xff;
text_op->chars.right_quote = "'";
text_op->chars.left_quote = "`";
text_op->chars.right_dbl_quote = "''";
text_op->chars.left_dbl_quote = "``";
return text_op;
}

42
programs/media/unrtf/text.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: text
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Definitions for the plain text output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _TEXT
extern OutputPersonality* text_init(void);
#define _TEXT
#endif

64
programs/media/unrtf/util.c Executable file
View File

@ -0,0 +1,64 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: util
* Author name: Zach Smith
* Create date: 01 Aug 01
* Purpose: Utility functions.
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdlib.h>
#include <ctype.h>
/*========================================================================
* Name: h2toi
* Purpose: Converts a 2-digit hexadecimal value to an unsigned integer.
* Args: String.
* Returns: Integer.
*=======================================================================*/
/* Convert a two-char hexadecimal expression to an integer */
int
h2toi (char *s) {
int tmp;
int ch;
tmp = tolower(*s++);
if (tmp>'9') tmp-=('a'-10);
else tmp-='0';
ch=16*tmp;
tmp = tolower(*s++);
if (tmp>'9') tmp-=('a'-10);
else tmp-='0';
ch+=tmp;
return ch;
}

34
programs/media/unrtf/util.h Executable file
View File

@ -0,0 +1,34 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: util
* Author name: Zach Smith
* Create date: 1 Aug 2001
* Purpose: Definitions for util module.
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
extern int h2toi (char *);

262
programs/media/unrtf/vt.c Executable file
View File

@ -0,0 +1,262 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: vt
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: text output with VT100 escape codes module
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char*
ascii_translation_table [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char*
upper_translation_table [128] = {
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
};
/*========================================================================
* Name: vt_init
* Purpose: Generates an output personality for the VT100 text format.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
vt_init (void)
{
OutputPersonality* vt_op;
vt_op = op_create();
vt_op->comment_begin = "### ";
vt_op->comment_end = "\n";
vt_op->document_begin = "";
vt_op->document_end = "";
vt_op->header_begin = "";
vt_op->header_end = "";
vt_op->document_title_begin = "\nTITLE: ";
vt_op->document_title_end = "\n";
vt_op->document_author_begin = "\nAUTHOR: ";
vt_op->document_author_end = "\n";
vt_op->document_changedate_begin = "\nDATE: ";
vt_op->document_changedate_end = "\n";
vt_op->body_begin = "\n-----------------\n";
vt_op->body_end = "";
vt_op->paragraph_begin = "";
vt_op->paragraph_end = "\n";
vt_op->center_begin = "";
vt_op->center_end = "";
vt_op->justify_begin = "";
vt_op->justify_end = "";
vt_op->align_left_begin = "";
vt_op->align_left_end = "";
vt_op->align_right_begin = "";
vt_op->align_right_end = "";
vt_op->forced_space = " ";
vt_op->line_break = "\n";
vt_op->page_break = "\n";
vt_op->hyperlink_begin = "";
vt_op->hyperlink_end = "";
vt_op->imagelink_begin = "";
vt_op->imagelink_end = "";
vt_op->table_begin = "\n";
vt_op->table_end = "\n";
vt_op->table_row_begin = "";
vt_op->table_row_end = "\n";
vt_op->table_cell_begin = "\t";
vt_op->table_cell_end = "";
/* Character attributes */
vt_op->font_begin = "";
vt_op->font_end = "";
vt_op->fontsize_begin = "";
vt_op->fontsize_end = "";
vt_op->fontsize8_begin = "";
vt_op->fontsize8_end = "";
vt_op->fontsize10_begin = "";
vt_op->fontsize10_end = "";
vt_op->fontsize12_begin = "";
vt_op->fontsize12_end = "";
vt_op->fontsize14_begin = "";
vt_op->fontsize14_end = "";
vt_op->fontsize18_begin = "";
vt_op->fontsize18_end = "";
vt_op->fontsize24_begin = "";
vt_op->fontsize24_end = "";
vt_op->smaller_begin = "";
vt_op->smaller_end = "";
vt_op->bigger_begin = "";
vt_op->bigger_end = "";
vt_op->foreground_begin = "";
vt_op->foreground_end = "";
vt_op->background_begin = "";
vt_op->background_end = "";
vt_op->bold_begin = "\033[7m";
vt_op->bold_end = "\033[m";
vt_op->italic_begin = "\033[7m";
vt_op->italic_end = "\033[m";
vt_op->underline_begin = "\033[4m";
vt_op->underline_end = "\033[m";
vt_op->dbl_underline_begin = "";
vt_op->dbl_underline_end = "";
vt_op->superscript_begin = "";
vt_op->superscript_end = "";
vt_op->subscript_begin = "";
vt_op->subscript_end = "";
vt_op->strikethru_begin = "";
vt_op->strikethru_end = "";
vt_op->dbl_strikethru_begin = "";
vt_op->dbl_strikethru_end = "";
vt_op->emboss_begin="";
vt_op->emboss_end = "";
vt_op->engrave_begin = "";
vt_op->engrave_end = "";
vt_op->shadow_begin= "";
vt_op->shadow_end= "";
vt_op->outline_begin= "";
vt_op->outline_end= "";
vt_op->expand_begin = "";
vt_op->expand_end = "";
vt_op->pointlist_begin = "\n";
vt_op->pointlist_end = "\n";
vt_op->pointlist_item_begin = " * ";
vt_op->pointlist_item_end = "\n";
vt_op->numericlist_begin = "\n";
vt_op->numericlist_end = "\n";
vt_op->numericlist_item_begin = " # ";
vt_op->numericlist_item_end = "\n";
vt_op->simulate_small_caps = TRUE;
vt_op->simulate_all_caps = TRUE;
vt_op->simulate_word_underline = TRUE;
vt_op->ascii_translation_table = ascii_translation_table;
vt_op->ansi_translation_table = upper_translation_table;
vt_op->ansi_first_char = 0x80;
vt_op->ansi_last_char = 0xff;
vt_op->cp437_translation_table = upper_translation_table;
vt_op->cp437_first_char = 0x80;
vt_op->cp437_last_char = 0xff;
vt_op->cp850_translation_table = upper_translation_table;
vt_op->cp850_first_char = 0x80;
vt_op->cp850_last_char = 0xff;
vt_op->mac_translation_table = upper_translation_table;
vt_op->mac_first_char = 0x80;
vt_op->mac_last_char = 0xff;
vt_op->chars.right_quote = "'";
vt_op->chars.left_quote = "`";
vt_op->chars.right_dbl_quote = "''";
vt_op->chars.left_dbl_quote = "``";
return vt_op;
}

42
programs/media/unrtf/vt.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: vt
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: Definitions for the VT text output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _VT
extern OutputPersonality* vt_init(void);
#define _VT
#endif

205
programs/media/unrtf/word.c Executable file
View File

@ -0,0 +1,205 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: word
* Author name: Zach Smith
* Create date: 01 Sep 00
* Purpose: Management of Word objects, which contain strings
* as well as other Words.
*----------------------------------------------------------------------
* Changes:
* 14 Oct 00, tuorfa@yahoo.com: fixed \fs bug (# is 2X the point size).
* 14 Oct 00, tuorfa@yahoo.com: fixed table data printing.
* 14 Oct 00, tuorfa@yahoo.com: protection against null entries in \info
* 14 Oct 00, tuorfa@yahoo.com: fixed printing of <body> again
* 14 Oct 00, tuorfa@yahoo.com: fixed closure of tables
* 15 Oct 00, tuorfa@yahoo.com: fixed font attributes preceding <tr><td>
* 15 Oct 00, tuorfa@yahoo.com: attributes now continue if >1 \cell in group
* 15 Oct 00, tuorfa@yahoo.com: fixed font-size bug, lack of </head>
* 7 Nov 00, tuorfa@yahoo.com: fixed \'## translatin bug
* 8 Apr 01, tuorfa@yahoo.com: added check for out of memory after malloc
* 21 Apr 01, tuorfa@yahoo.com: bug fixes regarding author, date
* 21 Apr 01, tuorfa@yahoo.com: added paragraph alignment
* 21 Apr 01, tuorfa@yahoo.com: fix for words getting lost after \par
* 24 Jul 01, tuorfa@yahoo.com: moved conversion code to convert.c
* 22 Sep 01, tuorfa@yahoo.com: moved word_dump to here from parse.c
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "defs.h"
#include "parse.h"
#include "malloc.h"
#include "main.h"
#include "error.h"
#include "word.h"
#include "hash.h"
/* For word_dump */
static int indent_level=0;
/*========================================================================
* Name: word_string
* Purpose: Obtains the string of a Word object. This involves accessing
* the Word hash.
* Args: Word*.
* Returns: String.
*=======================================================================*/
char *
word_string (Word *w) {
char *str;
CHECK_PARAM_NOT_NULL(w);
if (w->hash_index) str = hash_get_string (w->hash_index);
else str = NULL;
return str;
}
/*========================================================================
* Name: word_new
* Purpose: Instantiates a new Word object.
* Args: String.
* Returns: Word*.
*=======================================================================*/
Word *
word_new (char *str) {
Word * w;
w = (Word *) my_malloc(sizeof(Word));
if (!w)
error_handler ("out of memory");
memset ((void*) w, 0, sizeof(Word));
if (!w) error_handler ("cannot allocate a Word");
if (str) w->hash_index = hash_get_index (str);
else w->hash_index = 0;
return w;
}
/*========================================================================
* Name: word_free
* Purpose: Deallocates a Word object.
* Args: Word.
* Returns: None.
*=======================================================================*/
void word_free (Word *w) {
Word *prev;
Word *w2;
CHECK_PARAM_NOT_NULL(w);
while (w) {
w2 = w->child;
if (w2)
word_free(w2);
prev = w;
w = w->next;
my_free ((char*) prev);
}
}
/*========================================================================
* Name: print_indentation
* Purpose: Prints padding for the word_dump routine.
* Args: Identation level.
* Returns: None.
*=======================================================================*/
static void
print_indentation (int level)
{
int i;
if (level) {
for (i=0;i<level;i+=2)
printf (". ");
} else {
printf ("\n-----------------------------------------------------------------------\n\n");
}
}
/*========================================================================
* Name: word_dump
* Purpose: Recursive diagnostic routine to print out a tree of words.
* Args: Word tree.
* Returns: None.
*=======================================================================*/
void
word_dump (Word *w)
{
char *s;
CHECK_PARAM_NOT_NULL(w);
printf ("\n");
indent_level += 2;
print_indentation (indent_level);
while (w) {
s = word_string (w);
if (s) {
printf ("\"%s\" ", s);
} else {
if (w->child) {
word_dump (w->child);
printf ("\n");
print_indentation (indent_level);
}
else
warning_handler ("Word object has no string and no children");
}
w = w->next;
}
indent_level -= 2;
}

57
programs/media/unrtf/word.h Executable file
View File

@ -0,0 +1,57 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: word.h
* Author name: Zach Smith
* Create date: 1 Sept 2000
* Purpose: Definitions for Word class.
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _WORD
#define _WORD
typedef struct _w {
unsigned long hash_index;
struct _w * next;
struct _w * child;
}
Word;
extern Word* word_new (char*);
extern void word_free (Word*);
extern Word* word_read (FILE*);
extern char* word_string (Word*);
extern void word_dump (Word*);
extern void word_print_html (Word*);
#define _WORD
#endif

270
programs/media/unrtf/wpml.c Executable file
View File

@ -0,0 +1,270 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: wpml
* Author name: Zach Smith
* Create date: 19 Sep 01
* Purpose: WPML output module
* Note: WPML is my own format; it is a work-in-progress
*----------------------------------------------------------------------
* Changes:
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
*--------------------------------------------------------------------*/
#include <stdio.h>
#include <string.h>
#include "malloc.h"
#include "defs.h"
#include "error.h"
#include "main.h"
#include "output.h"
static char*
ascii_translation_table [96] = {
/* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'",
/* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/",
/* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7",
/* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?",
/* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G",
/* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O",
/* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W",
/* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_",
/* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g",
/* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o",
/* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w",
/* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "",
};
static char*
upper_translation_table [128] = {
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
"?", "?", "?", "?", "?", "?", "?", "?",
};
/*========================================================================
* Name: wpml_init
* Purpose: Generates an output personality for the WPML format.
* Args: None.
* Returns: OutputPersonality.
*=======================================================================*/
OutputPersonality *
wpml_init (void)
{
OutputPersonality* wpml_op;
wpml_op = op_create();
wpml_op->comment_begin = "<!--";
wpml_op->comment_end = "-->\n";
wpml_op->document_begin = "<WPML>";
wpml_op->document_end = "</WPML>";
wpml_op->header_begin = "<HEAD>";
wpml_op->header_end = "</HEAD>";
wpml_op->document_title_begin = "<TITLE>";
wpml_op->document_title_end = "</TITLE>";
wpml_op->document_author_begin = "<AUTHOR>";
wpml_op->document_author_end = "</AUTHOR>\n";
wpml_op->document_changedate_begin = "<DATE>";
wpml_op->document_changedate_end = "</DATE>\n";
wpml_op->body_begin = "\n<BODY>\n";
wpml_op->body_end = "</BODY>";
wpml_op->paragraph_begin = "<LINE>";
wpml_op->paragraph_end = "</LINE>\n";
wpml_op->center_begin = "";
wpml_op->center_end = "";
wpml_op->justify_begin = "";
wpml_op->justify_end = "";
wpml_op->align_left_begin = "";
wpml_op->align_left_end = "";
wpml_op->align_right_begin = "";
wpml_op->align_right_end = "";
wpml_op->forced_space = " ";
wpml_op->line_break = "\n";
wpml_op->page_break = "\n";
wpml_op->hyperlink_begin = "";
wpml_op->hyperlink_end = "";
wpml_op->imagelink_begin = "";
wpml_op->imagelink_end = "";
wpml_op->table_begin = "<TABLE>\n";
wpml_op->table_end = "</TABLE>\n";
wpml_op->table_row_begin = "<TABLEROW>";
wpml_op->table_row_end = "</TABLEROW>\n";
wpml_op->table_cell_begin = "<TABLECELL>";
wpml_op->table_cell_end = "</TABLECELL>";
/* Character attributes */
/* XX: WPML will require that all elements that are now
* character attribute strings be converted to functions,
* so that a complete font description can be written
* each time an attribute begins or ends.
*/
wpml_op->font_begin = "<FONT=\"%s\"/>";
wpml_op->font_end = "";
wpml_op->fontsize_begin = "";
wpml_op->fontsize_end = "";
wpml_op->fontsize8_begin = "";
wpml_op->fontsize8_end = "";
wpml_op->fontsize10_begin = "";
wpml_op->fontsize10_end = "";
wpml_op->fontsize12_begin = "";
wpml_op->fontsize12_end = "";
wpml_op->fontsize14_begin = "";
wpml_op->fontsize14_end = "";
wpml_op->fontsize18_begin = "";
wpml_op->fontsize18_end = "";
wpml_op->fontsize24_begin = "";
wpml_op->fontsize24_end = "";
wpml_op->smaller_begin = "";
wpml_op->smaller_end = "";
wpml_op->bigger_begin = "";
wpml_op->bigger_end = "";
wpml_op->foreground_begin = "";
wpml_op->foreground_end = "";
wpml_op->background_begin = "";
wpml_op->background_end = "";
wpml_op->bold_begin = "";
wpml_op->bold_end = "";
wpml_op->italic_begin = "";
wpml_op->italic_end = "";
wpml_op->underline_begin = "";
wpml_op->underline_end = "";
wpml_op->dbl_underline_begin = "";
wpml_op->dbl_underline_end = "";
wpml_op->superscript_begin = "";
wpml_op->superscript_end = "";
wpml_op->subscript_begin = "";
wpml_op->subscript_end = "";
wpml_op->strikethru_begin = "";
wpml_op->strikethru_end = "";
wpml_op->dbl_strikethru_begin = "";
wpml_op->dbl_strikethru_end = "";
wpml_op->emboss_begin="";
wpml_op->emboss_end = "";
wpml_op->engrave_begin = "";
wpml_op->engrave_end = "";
wpml_op->shadow_begin= "";
wpml_op->shadow_end= "";
wpml_op->outline_begin= "";
wpml_op->outline_end= "";
wpml_op->expand_begin = "";
wpml_op->expand_end = "";
wpml_op->pointlist_begin = "\n";
wpml_op->pointlist_end = "\n";
wpml_op->pointlist_item_begin = "";
wpml_op->pointlist_item_end = "\n";
wpml_op->numericlist_begin = "\n";
wpml_op->numericlist_end = "\n";
wpml_op->numericlist_item_begin = "";
wpml_op->numericlist_item_end = "\n";
wpml_op->simulate_small_caps = TRUE;
wpml_op->simulate_all_caps = TRUE;
wpml_op->simulate_word_underline = TRUE;
wpml_op->ascii_translation_table = ascii_translation_table;
wpml_op->ansi_translation_table = upper_translation_table;
wpml_op->ansi_first_char = 0x80;
wpml_op->ansi_last_char = 0xff;
wpml_op->cp437_translation_table = upper_translation_table;
wpml_op->cp437_first_char = 0x80;
wpml_op->cp437_last_char = 0xff;
wpml_op->cp850_translation_table = upper_translation_table;
wpml_op->cp850_first_char = 0x80;
wpml_op->cp850_last_char = 0xff;
wpml_op->mac_translation_table = upper_translation_table;
wpml_op->mac_first_char = 0x80;
wpml_op->mac_last_char = 0xff;
wpml_op->chars.right_quote = "'";
wpml_op->chars.left_quote = "`";
wpml_op->chars.right_dbl_quote = "''";
wpml_op->chars.left_dbl_quote = "``";
return wpml_op;
}

42
programs/media/unrtf/wpml.h Executable file
View File

@ -0,0 +1,42 @@
/*=============================================================================
GNU UnRTF, a command-line program to convert RTF documents to other formats.
Copyright (C) 2000,2001 Zachary Thayer Smith
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The author is reachable by electronic mail at tuorfa@yahoo.com.
=============================================================================*/
/*----------------------------------------------------------------------
* Module name: wpml
* Author name: Zach Smith
* Create date: 22 Sep 01
* Purpose: Definitions for the WPML output personality
*----------------------------------------------------------------------
* Changes:
*--------------------------------------------------------------------*/
#ifndef _WPML
extern OutputPersonality* wpml_init(void);
#define _WPML
#endif