/*============================================================================= GNU UnRTF, a command-line program to convert RTF documents to other formats. Copyright (C) 2000,2001 Zachary Thayer Smith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA The author is reachable by electronic mail at tuorfa@yahoo.com. =============================================================================*/ /*---------------------------------------------------------------------- * Module name: html * Author name: Zach Smith * Create date: 18 Sep 01 * Purpose: HTML-specific output module *---------------------------------------------------------------------- * Changes: * 01 Aug 01, tuorfa@yahoo.com: code moved over from convert.c * 03 Aug 01, tuorfa@yahoo.com: removed null entries to save space * 08 Aug 01, tuorfa@yahoo.com, gommer@gmx.net: fixed/added some ANSI chars * 18 Sep 01, tuorfa@yahoo.com: moved character sets into html.c etc * 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks *--------------------------------------------------------------------*/ #include <stdio.h> #include <string.h> #include "malloc.h" #include "defs.h" #include "error.h" #include "main.h" #include "output.h" static char* ascii [96] = { /* 0x20 */ " ", "!", "\"", "#", "$", "%", "&", "'", /* 0x28 */ "(", ")", "*", "+", ",", "-", ".", "/", /* 0x30 */ "0", "1", "2", "3", "4", "5", "6", "7", /* 0x38 */ "8", "9", ":", ";", "<", "=", ">", "?", /* 0x40 */ "@", "A", "B", "C", "D", "E", "F", "G", /* 0x48 */ "H", "I", "J", "K", "L", "M", "N", "O", /* 0x50 */ "P", "Q", "R", "S", "T", "U", "V", "W", /* 0x58 */ "X", "Y", "Z", "[", "\\", "]", "^", "_", /* 0x60 */ "`", "a", "b", "c", "d", "e", "f", "g", /* 0x68 */ "h", "i", "j", "k", "l", "m", "n", "o", /* 0x70 */ "p", "q", "r", "s", "t", "u", "v", "w", /* 0x78 */ "x", "y", "z", "{", "|", "}", "~", "", }; static char* ansi [] = { /* 0x82 */ "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", NULL, NULL, NULL, /* 0x90 */ NULL,"`","'","``","''","•","–","—", /* 0x98 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, /* 0xa0 */ " ","¡","¢","£","¤","¥","¦","§", /* 0xa8 */ "¨","©","ª","«","¬","­","®","¯", /* 0xb0 */ "°", "±","²","³","´","µ","¶","·", /* 0xb8 */ "¸","¹", "º","»", "¼", "½","¾","¿", /* 0xc0 */ "À","Á","Â","Ã","Ä","Å","Æ","Ç", /* 0xc8 */ "È","É","Ê","Ë","Ì","Í","Î","Ï", /* 0xd0 */ "Ð","Ñ","Ò","Ó","Ô","Õ","Ö","×", /* 0xd8 */ "Ø","Ù","Ú","Û","Ü","Ý","Þ","ß", /* 0xe0 */ "à","á","â","ã","ä","å","æ","ç", /* 0xe8 */ "è","é","ê","ë","ì","í","î","ï", /* 0xf0 */ "ð","ñ","ò","ó","ô","õ","ö","÷", /* 0xf8 */ "ø","ù","ú","û","ü","ý","þ","ÿ", }; static char* mac [] = { /* 0xa4 */ "•", NULL,NULL,NULL,NULL,NULL,NULL,NULL, /* 0xb0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, /* 0xc0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, /* 0xd0 */ "—","–","“","”","&lquo;","&rquo;", }; static char* cp437 [] = { /* 0x80 */ "ç", /* 0x81 */ "ü", /* 0x82 */ "é", /* 0x83 */ "â", /* 0x84 */ "ä", /* 0x85 */ "à", /* 0x86 */ "å", /* 0x87 */ "ç", /* 0x88 */ "ê", /* 0x89 */ "ë", /* 0x8a */ "è", /* 0x8b */ "ï", /* 0x8c */ "î", /* 0x8d */ "ì", /* 0x8e */ "ä", /* 0x8f */ "å", /* 0x90 */ "é", /* 0x91 */ "æ", /* 0x92 */ "æ", /* 0x93 */ "ô", /* 0x94 */ "ö", /* 0x95 */ "ò", /* 0x96 */ "û", /* 0x97 */ "ù", /* 0x98 */ "ÿ", /* 0x99 */ "ö", /* 0x9a */ "ü", /* 0x9b */ "¢", /* 0x9c */ "£", /* 0x9d */ "¥", /* 0x9e */ "₧", /* peseta */ /* 0x9f */ "ƒ", /* small f with hook */ /* 0xa0 */ "á", /* 0xa1 */ "í", /* 0xa2 */ "ó", /* 0xa3 */ "ú", /* 0xa4 */ "ñ", /* 0xa5 */ "ñ", /* 0xa6 */ "ª", /* 0xa7 */ "¼", /* 0xa8 */ "¿", /* 0xa9 */ "⌐", /* reversed not */ /* 0xaa */ "¬", /* 0xab */ "½", /* 0xac */ "»", /* 0xad */ "¡", /* 0xae */ "«", /* 0xaf */ "º", /* 0xb0 */ "░", /* light shade */ /* 0xb1 */ "▒", /* med. shade */ /* 0xb2 */ "▓", /* dark shade */ /* 0xb3 */ "│", /* box-draw light vert. */ /* 0xb4 */ "┤", /* box-draw light vert. + lt. */ /* 0xb5 */ "╡", /* box-draw vert. sgl. + lt. dbl. */ /* 0xb6 */ "╢", /* box-draw vert. dbl. + lt. sgl. */ /* 0xb7 */ "╖", /* box-draw dn. dbl. + lt. sgl. */ /* 0xb8 */ "╕", /* box-draw dn. sgl. + lt. dbl. */ /* 0xb9 */ "╣", /* box-draw dbl. vert. + lt. */ /* 0xba */ "║", /* box-draw dbl. vert. */ /* 0xbb */ "╗", /* box-draw dbl. dn. + lt. */ /* 0xbc */ "╝", /* box-draw dbl. up + lt. */ /* 0xbd */ "╜", /* box-draw up dbl. + lt. sgl. */ /* 0xbe */ "╛", /* box-draw up sgl. + lt. dbl. */ /* 0xbf */ "┐", /* box-draw light dn. + lt. */ /* 0xc0 */ "└", /* box-draw light up + rt. */ /* 0xc1 */ "┴", /* box-draw light up + horiz. */ /* 0xc2 */ "┬", /* box-draw light dn. + horiz. */ /* 0xc3 */ "├", /* box-draw light vert. + rt. */ /* 0xc4 */ "─", /* box-draw light horiz. */ /* 0xc5 */ "┼", /* box-draw light vert. + horiz. */ /* 0xc6 */ "╞", /* box-draw vert. sgl. + rt. dbl. */ /* 0xc7 */ "╟", /* box-draw vert. dbl. + rt. sgl. */ /* 0xc8 */ "╚", /* box-draw dbl. up + rt. */ /* 0xc9 */ "╔", /* box-draw dbl. dn. + rt. */ /* 0xca */ "╩", /* box-draw dbl. up + horiz. */ /* 0xcb */ "╦", /* box-draw dbl. dn. + horiz. */ /* 0xcc */ "╠", /* box-draw dbl. vert. + rt. */ /* 0xcd */ "═", /* box-draw dbl. horiz. */ /* 0xce */ "╬", /* box-draw dbl. vert. + horiz. */ /* 0xcf */ "╧", /* box-draw up sgl. + horiz. dbl. */ /* 0xd0 */ "╨", /* box-draw up dbl. + horiz. sgl. */ /* 0xd1 */ "╤", /* box-draw dn. sgl. + horiz. dbl. */ /* 0xd2 */ "╥", /* box-draw dn. dbl. + horiz. sgl. */ /* 0xd3 */ "╙", /* box-draw up dbl. + rt. sgl. */ /* 0xd4 */ "╘", /* box-draw up sgl. + rt. dbl. */ /* 0xd5 */ "╒", /* box-draw dn. sgl. + rt. dbl. */ /* 0xd6 */ "╓", /* box-draw dn. dbl. + rt. sgl. */ /* 0xd7 */ "╫", /* box-draw vert. dbl. + horiz. sgl. */ /* 0xd8 */ "╪", /* box-draw vert. sgl. + horiz. dbl. */ /* 0xd9 */ "┘", /* box-draw light up + lt. */ /* 0xda */ "┌", /* box-draw light dn. + rt. */ /* 0xdb */ "█", /* full block */ /* 0xdc */ "▄", /* lower 1/2 block */ /* 0xdd */ "▌", /* lt. 1/2 block */ /* 0xde */ "▐", /* rt. 1/2 block */ /* 0xdf */ "▀", /* upper 1/2 block */ /* 0xe0 */ "α", /* greek small alpha */ /* 0xe1 */ "ß", /* 0xe2 */ "Γ", /* greek cap gamma */ /* 0xe3 */ "π", /* greek small pi */ /* 0xe4 */ "Σ", /* greek cap sigma */ /* 0xe5 */ "σ", /* greek small sigma */ /* 0xe6 */ "µ", /* 0xe7 */ "τ", /* greek small tau */ /* 0xe8 */ "Φ", /* greek cap phi */ /* 0xe9 */ "Θ", /* greek cap theta */ /* 0xea */ "Ω", /* greek cap omega */ /* 0xeb */ "δ", /* greek small delta */ /* 0xec */ "∞", /* inf. */ /* 0xed */ "φ", /* greek small phi */ /* 0xee */ "ε", /* greek small epsilon */ /* 0xef */ "∩", /* intersect */ /* 0xf0 */ "≡", /* identical */ /* 0xf1 */ "±", /* 0xf2 */ "≥", /* greater-than or equal to */ /* 0xf3 */ "≤", /* less-than or equal to */ /* 0xf4 */ "⌠", /* top 1/2 integral */ /* 0xf5 */ "⌡", /* bottom 1/2 integral */ /* 0xf6 */ "÷", /* 0xf7 */ "≈", /* almost = */ /* 0xf8 */ "+", /* 0xf9 */ "∙", /* bullet op */ /* 0xfa */ "·", /* 0xfb */ "√", /* sqrt */ /* 0xfc */ "ⁿ", /* super-script small n */ /* 0xfd */ "²", /* 0xfe */ "■", /* black square */ /* 0xff */ " ", }; static char* cp850 [] = { /* 0x80 */ "ç", /* 0x81 */ "ü", /* 0x82 */ "é", /* 0x83 */ "â", /* 0x84 */ "ä", /* 0x85 */ "à", /* 0x86 */ "å", /* 0x87 */ "ç", /* 0x88 */ "ê", /* 0x89 */ "ë", /* 0x8a */ "è", /* 0x8b */ "ï", /* 0x8c */ "î", /* 0x8d */ "ì", /* 0x8e */ "ä", /* 0x8f */ "å", /* 0x90 */ "é", /* 0x91 */ "æ", /* 0x92 */ "æ", /* 0x93 */ "ô", /* 0x94 */ "ö", /* 0x95 */ "ò", /* 0x96 */ "û", /* 0x97 */ "ù", /* 0x98 */ "ÿ", /* 0x99 */ "ö", /* 0x9a */ "ü", /* 0x9b */ "ø", /* 0x9c */ "£", /* 0x9d */ "ø", /* 0x9e */ "×", /* 0x9f */ "ƒ", /* small f with hook */ /* 0xa0 */ "á", /* 0xa1 */ "í", /* 0xa2 */ "ó", /* 0xa3 */ "ú", /* 0xa4 */ "ñ", /* 0xa5 */ "ñ", /* 0xa6 */ "ª", /* 0xa7 */ "¼", /* 0xa8 */ "¿", /* 0xa9 */ "®", /* 0xaa */ "¬", /* 0xab */ "½", /* 0xac */ "»", /* 0xad */ "¡", /* 0xae */ "«", /* 0xaf */ "º", /* 0xb0 */ "░", /* light shade */ /* 0xb1 */ "▒", /* med. shade */ /* 0xb2 */ "▓", /* dark shade */ /* 0xb3 */ "│", /* box-draw light vert. */ /* 0xb4 */ "┤", /* box-draw light vert. + lt. */ /* 0xb5 */ "á", /* 0xb6 */ "â", /* 0xb7 */ "à", /* 0xb8 */ "©", /* 0xb9 */ "╣", /* box-draw dbl. vert. + lt. */ /* 0xba */ "║", /* box-draw dbl. vert. */ /* 0xbb */ "╗", /* box-draw dbl. dn. + lt. */ /* 0xbc */ "╝", /* box-draw dbl. up + lt. */ /* 0xbd */ "¢", /* 0xbe */ "¥", /* 0xbf */ "┐", /* box-draw light dn. + lt. */ /* 0xc0 */ "└", /* box-draw light up + rt. */ /* 0xc1 */ "┴", /* box-draw light up + horiz. */ /* 0xc2 */ "┬", /* box-draw light dn. + horiz. */ /* 0xc3 */ "├", /* box-draw light vert. + rt. */ /* 0xc4 */ "─", /* box-draw light horiz. */ /* 0xc5 */ "┼", /* box-draw light vert. + horiz. */ /* 0xc6 */ "ã", /* 0xc7 */ "ã", /* 0xc8 */ "╚", /* box-draw dbl. up + rt. */ /* 0xc9 */ "╔", /* box-draw dbl. dn. + rt. */ /* 0xca */ "╩", /* box-draw dbl. up + horiz. */ /* 0xcb */ "╦", /* box-draw dbl. dn. + horiz. */ /* 0xcc */ "╠", /* box-draw dbl. vert. + rt. */ /* 0xcd */ "═", /* box-draw dbl. horiz. */ /* 0xce */ "╬", /* box-draw dbl. vert. + horiz. */ /* 0xcf */ "¤", /* 0xd0 */ "ð", /* 0xd1 */ "ð", /* 0xd2 */ "ê", /* 0xd3 */ "ë", /* 0xd4 */ "è", /* 0xd5 */ "ı", /* small dotless i */ /* 0xd6 */ "í", /* 0xd7 */ "î", /* 0xd8 */ "ï", /* 0xd9 */ "┘", /* box-draw light up + lt. */ /* 0xda */ "┌", /* box-draw light dn. + rt. */ /* 0xdb */ "█", /* full-block */ /* 0xdc */ "▄", /* lower 1/2 block */ /* 0xdd */ "¦", /* 0xde */ "ì", /* 0xdf */ "▀", /* upper 1/2 block */ /* 0xe0 */ "ó", /* 0xe1 */ "ß", /* 0xe2 */ "ô", /* 0xe3 */ "ò", /* 0xe4 */ "õ", /* 0xe5 */ "õ", /* 0xe6 */ "µ", /* 0xe7 */ "þ", /* 0xe8 */ "þ", /* 0xe9 */ "ú", /* 0xea */ "û", /* 0xeb */ "ù", /* 0xec */ "ý", /* 0xed */ "ý", /* 0xee */ "¯", /* 0xef */ "´", /* 0xf0 */ "­", /* 0xf1 */ "±", /* 0xf2 */ "‗", /* dbl. lowline */ /* 0xf3 */ "¾", /* 0xf4 */ "¶", /* 0xf5 */ "§", /* 0xf6 */ "÷", /* 0xf7 */ "¸", /* 0xf8 */ "+", /* 0xf9 */ "¨", /* 0xfa */ "·", /* 0xfb */ "¹", /* 0xfc */ "³", /* 0xfd */ "²", /* 0xfe */ "■", /* black square */ /* 0xff */ " ", }; /*======================================================================== * Name: html_init * Purpose: Generates the HTML output personality. * Args: None. * Returns: OutputPersonality. *=======================================================================*/ OutputPersonality * html_init (void) { OutputPersonality* op; op = op_create(); op->comment_begin = "<!--- "; op->comment_end = " --->\n"; op->document_begin = "<html>\n"; op->document_end = "</html>\n"; op->header_begin = "<head>\n"; op->header_end = "</head>\n"; op->document_title_begin = "<title>"; op->document_title_end = "</title>\n"; op->document_author_begin = "<!--author: "; op->document_author_end = "--->\n"; op->document_changedate_begin = "<!--changed: "; op->document_changedate_end = "--->\n"; op->body_begin = "<body>"; op->body_end = "</body>\n"; op->paragraph_begin = "<p>"; op->paragraph_end = "</p>\n"; op->center_begin = "<center>"; op->center_end = "</center>\n"; op->justify_begin = "<div align=justify>\n"; op->justify_end = "</div>\n"; op->align_left_begin = "<div align=left>\n"; op->align_left_end = "</div>\n"; op->align_right_begin = "<div align=right>\n"; op->align_right_end = "</div>\n"; op->forced_space = " "; op->line_break = "<br>\n"; op->page_break = "<p><hr><p>\n"; op->hyperlink_begin = "<a href=\""; op->hyperlink_end = "\">hyperlink</a>"; op->imagelink_begin = "<img src=\""; op->imagelink_end = "\">"; op->table_begin = "<table border=2>\n"; op->table_end = "</table>\n"; op->table_row_begin = "<tr>"; op->table_row_end = "</tr>\n"; op->table_cell_begin = "<td>"; op->table_cell_end = "</td>\n"; /* Character attributes */ op->font_begin = "<font face=\"%s\">"; op->font_end = "</font>"; op->fontsize_begin = "<span style=\"font-size:%spt\">"; op->fontsize_end = "</span>"; op->fontsize8_begin = "<font size=1>"; op->fontsize8_end = "</font>"; op->fontsize10_begin = "<font size=2>"; op->fontsize10_end = "</font>"; op->fontsize12_begin = "<font size=3>"; op->fontsize12_end = "</font>"; op->fontsize14_begin = "<font size=4>"; op->fontsize14_end = "</font>"; op->fontsize18_begin = "<font size=5>"; op->fontsize18_end = "</font>"; op->fontsize24_begin = "<font size=6>"; op->fontsize24_end = "</font>"; op->smaller_begin = "<small>"; op->smaller_end = "</small>"; op->bigger_begin = "<big>"; op->bigger_end = "</big>"; op->foreground_begin = "<font color=\"%s\">"; op->foreground_end = "</font>"; op->background_begin = "<span style=\"background:%s\">"; op->background_end = "</span>"; op->bold_begin = "<b>"; op->bold_end = "</b>"; op->italic_begin = "<i>"; op->italic_end = "</i>"; op->underline_begin = "<u>"; op->underline_end = "</u>"; op->dbl_underline_begin = "<u>"; op->dbl_underline_end = "</u>"; op->superscript_begin = "<sup>"; op->superscript_end = "</sup>"; op->subscript_begin = "<sub>"; op->subscript_end = "</sub>"; op->strikethru_begin = "<s>"; op->strikethru_end = "</s>"; op->dbl_strikethru_begin = "<s>"; op->dbl_strikethru_end = "</s>"; op->emboss_begin="<span style=\"background:gray\"><font color=black>"; op->emboss_end = "</font></span>"; op->engrave_begin = "<span style=\"background:gray\"><font color=navyblue>"; op->engrave_end = "</font></span>"; op->shadow_begin= "<span style=\"background:gray\">"; op->shadow_end= "</span>"; op->outline_begin= "<span style=\"background:gray\">"; op->outline_end= "</span>"; op->expand_begin = "<span style=\"letter-spacing: %s\">"; op->expand_end = "</span>"; op->pointlist_begin = "<ol>\n"; op->pointlist_end = "</ol>\n"; op->pointlist_item_begin = "<li>"; op->pointlist_item_end = "</li>\n"; op->numericlist_begin = "<ul>\n"; op->numericlist_end = "</ul>\n"; op->numericlist_item_begin = "<li>"; op->numericlist_item_end = "</li>\n"; op->simulate_small_caps = TRUE; op->simulate_all_caps = TRUE; op->simulate_word_underline = TRUE; op->ascii_translation_table = ascii; op->ansi_translation_table = ansi; op->ansi_first_char = 0x82; op->ansi_last_char = 0xff; op->cp437_translation_table = cp437; op->cp437_first_char = 0x80; op->cp437_last_char = 0xff; op->cp850_translation_table = cp850; op->cp850_first_char = 0x80; op->cp850_last_char = 0xff; op->mac_translation_table = mac; op->mac_first_char = 0xa4; op->mac_last_char = 0xd5; op->chars.right_quote = "'"; op->chars.left_quote = "`"; op->chars.right_dbl_quote = "''"; op->chars.left_dbl_quote = "``"; return op; }