198 lines
5.0 KiB
C
198 lines
5.0 KiB
C
|
|
||
|
/*=============================================================================
|
||
|
GNU UnRTF, a command-line program to convert RTF documents to other formats.
|
||
|
Copyright (C) 2000,2001 Zachary Thayer Smith
|
||
|
|
||
|
This program is free software; you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation; either version 2 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
|
The author is reachable by electronic mail at tuorfa@yahoo.com.
|
||
|
=============================================================================*/
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------
|
||
|
* Module name: hash
|
||
|
* Author name: Zach Smith
|
||
|
* Create date: 01 Sep 00
|
||
|
* Purpose: Word-hash management. Words are put into a hash and an
|
||
|
* identifier is returned. This is used to save us from
|
||
|
* doing multiple mallocs for recurring strings such as
|
||
|
* 'the' and \par. This is not a big issue under Unix,
|
||
|
* but it is under other OSes and anyway, waste not want not.
|
||
|
*----------------------------------------------------------------------
|
||
|
* Changes:
|
||
|
* 08 Apr 01, tuorfa@yahoo.com: check for out of memory after malloc.
|
||
|
* 21 Apr 01, tuorfa@yahoo.com: signed to conversion unsigned bug
|
||
|
* 03 Aug 01, tuorfa@yahoo.com: fixes for using 16-bit compiler
|
||
|
* 22 Sep 01, tuorfa@yahoo.com: added function-level comment blocks
|
||
|
*--------------------------------------------------------------------*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "error.h"
|
||
|
#include "main.h"
|
||
|
#include "malloc.h"
|
||
|
|
||
|
|
||
|
typedef struct _hi {
|
||
|
struct _hi *next;
|
||
|
char *str;
|
||
|
unsigned long value;
|
||
|
}
|
||
|
HashItem;
|
||
|
|
||
|
|
||
|
/* Index by first char of string */
|
||
|
static HashItem *hash[256];
|
||
|
static unsigned long hash_length[256];
|
||
|
static unsigned long hash_value=0;
|
||
|
|
||
|
|
||
|
|
||
|
/*========================================================================
|
||
|
* Name: hash_init
|
||
|
* Purpose: Clear the hash table.
|
||
|
* Args: None.
|
||
|
* Returns: None.
|
||
|
*=======================================================================*/
|
||
|
|
||
|
void
|
||
|
hash_init ()
|
||
|
{
|
||
|
int i;
|
||
|
for (i=0; i<256; i++) {
|
||
|
hash[i]=NULL;
|
||
|
hash_length[i]=0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*========================================================================
|
||
|
* Name: hash_stats
|
||
|
* Purpose: Prints to stderr the number of words stored.
|
||
|
* Args: None.
|
||
|
* Returns: None.
|
||
|
*=======================================================================*/
|
||
|
|
||
|
void
|
||
|
hash_stats ()
|
||
|
{
|
||
|
int i;
|
||
|
unsigned long total=0;
|
||
|
for (i=0; i<256; i++) {
|
||
|
total += hash_length[i];
|
||
|
}
|
||
|
fprintf (stderr,"%lu words were hashed.\n", total);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*========================================================================
|
||
|
* Name: hashitem_new
|
||
|
* Purpose: Creates a new linked list item for the hash table.
|
||
|
* Args: String.
|
||
|
* Returns: HashItem.
|
||
|
*=======================================================================*/
|
||
|
|
||
|
static HashItem *
|
||
|
hashitem_new (char *str)
|
||
|
{
|
||
|
HashItem *hi;
|
||
|
unsigned long i;
|
||
|
|
||
|
hi=(HashItem*) my_malloc(sizeof(HashItem));
|
||
|
if (!hi)
|
||
|
error_handler ("out of memory");
|
||
|
memset ((void*)hi, 0, sizeof (HashItem));
|
||
|
|
||
|
hi->str = my_strdup(str);
|
||
|
|
||
|
i = *str;
|
||
|
if (i=='\\') i=str[1];
|
||
|
i <<= 24;
|
||
|
hi->value = i | (hash_value++ & 0xffffff);
|
||
|
hi->next = NULL;
|
||
|
|
||
|
#if 0
|
||
|
if (debug_mode) {
|
||
|
printf ("<!-- storing val %08lx str %s -->\n",
|
||
|
hi->value, hi->str);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
return hi;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*========================================================================
|
||
|
* Name: hash_get_index
|
||
|
* Purpose: Given a string, returns the "index" i.e. the word identifier.
|
||
|
* Args: String.
|
||
|
* Returns: Index.
|
||
|
*=======================================================================*/
|
||
|
|
||
|
unsigned long
|
||
|
hash_get_index (char *str)
|
||
|
{
|
||
|
unsigned short index;
|
||
|
HashItem *hi;
|
||
|
char ch;
|
||
|
|
||
|
ch = *str;
|
||
|
if (ch=='\\' && *(str+1))
|
||
|
ch = *(str+1);
|
||
|
index = (unsigned) ch;
|
||
|
hi = hash[index];
|
||
|
while (hi) {
|
||
|
if (!strcmp(hi->str,str))
|
||
|
return hi->value;
|
||
|
hi=hi->next;
|
||
|
}
|
||
|
/* not in hash */
|
||
|
hi = hashitem_new (str);
|
||
|
hi->next = hash[index];
|
||
|
hash [index] = hi;
|
||
|
++hash_length [index];
|
||
|
return hi->value;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*========================================================================
|
||
|
* Name: hash_get_string
|
||
|
* Purpose: Given the index (word identifier) returns the word string.
|
||
|
* Args: Index.
|
||
|
* Returns: String, or NULL if not found.
|
||
|
*=======================================================================*/
|
||
|
|
||
|
char*
|
||
|
hash_get_string (unsigned long value)
|
||
|
{
|
||
|
int index;
|
||
|
HashItem *hi;
|
||
|
index = value >> 24;
|
||
|
hi = hash[index];
|
||
|
while (hi) {
|
||
|
if (hi->value == value)
|
||
|
return hi->str;
|
||
|
hi=hi->next;
|
||
|
}
|
||
|
warning_handler ("word not in hash");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
|