An associative container

J

jacob navia

/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
Project: Container library
State: First try
Creation Date: November 2009
Description: This file implements the associative container
"Dictionary", that maps a character string to
some arbitrary data.
Dictionaries can copy their keys and data if
the constructor is called with some element
size. If the constructor is called with
element size zero, no storage will be used to
copy the data, and the dictionary can store
any kind of objects.
If an element size is given, the software
assumes that all data is of that size.
--------------------------------------------------------------------*/


#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
/* ------------------------------------------------------------
Default settings: NO_C99 NO_GC
---------------------------------------------------------------*/
#define NO_GC
#define NO_C99

#ifdef NO_C99
/* No stdbool */
typedef int bool;
/* No flexible arrays */
#define MINIMUM_ARRAY_INDEX 1
#else
/* Use C99 features */
#define MINIMUM_ARRAY_INDEX
#include <stdbool.h>
#endif

#ifdef UNIX
#define stricmp strcasecmp
#endif

#ifndef NO_GC
/* A garbage collector makes life easier but is not universally available */
#include <gc.h>
#define MALLOC GC_malloc
#define FREE(a)
#else
/* No GC, use the MALLOC/FREE system. */
#define MALLOC malloc
#define FREE free
#endif
/* General container flags */
#define CONTAINER_READONLY 1
/************************************************************************** */
/* */
/* ErrorHandling */
/* */
/************************************************************************** */
#define CONTAINER_ERROR_BADARG (0x80070057)
#define CONTAINER_ERROR_NOMEMORY (0x8007000E)
#define CONTAINER_ERROR_INDEX -3
#define CONTAINER_ERROR_READONLY -4
#define CONTAINER_ERROR_FILEOPEN -5
#define CONTAINER_ERROR_NOTFOUND -6
void ContainerRaiseError(const char *fnname,int errorcode);
void EmptyErrorFunction(const char *fnname,int errcode){}
typedef void (*ErrorFunction)(const char *,int);

/* ----------------------------------Dictionary interface -----------------------*/

typedef struct _Dictionary Dictionary;

typedef struct {
/* Returns the number of elements stored */
size_t (*GetCount)(Dictionary *Dict);

/* Get the flags */
unsigned (*GetFlags)(Dictionary *Dict);

/* Sets th flags */
unsigned (*SetFlags)(Dictionary *Dict,unsigned flags);

/* Adds one element. Given string is copied */
int (*Add)(Dictionary *Dict,unsigned char *key,void *Data);

/* Clears all data and frees the memory */
int (*Clear)(Dictionary *Dict);

/* Returns the string at the given position */
void *(*GetElement)(Dictionary *Dict,unsigned char *Key);

/* Removes the given string if found */
bool (*Remove)(Dictionary *Dict,unsigned char *);

/* Frees the memory used by the dictionary */
int (*Finalize)(Dictionary *Dict);

/* Calls the given function for all strings. "Arg" is a used supplied argument */
/* that can be NULL that is passed to the function to call */
bool (*Apply)(Dictionary *Dict,int (*Applyfn)(unsigned char *Key,void *data,void *arg),void *arg);

/* Set or unset the error function */
ErrorFunction (*SetErrorFunction)(Dictionary *Dict,ErrorFunction fn);


} DictionaryInterface;

struct _Dictionary {
DictionaryInterface *lpVtbl;
size_t size;
unsigned (*hash)(unsigned char *Key);
ErrorFunction RaiseError;
size_t count;
unsigned timestamp;
unsigned Flags;
size_t ElementSize;
struct DataList {
struct DataList *Next;
unsigned char *Key;
void *Value;
} **buckets;
};


Dictionary *newDictionary(size_t hint,size_t ElementSize);


/* -------------------------------- Prototypes ----------------------------------*/
/* Returns the number of elements stored */
static size_t GetCount(Dictionary *Dict);
/* Get the flags */
static unsigned GetFlags(Dictionary *Dict);
/* Sets th flags */
static unsigned SetFlags(Dictionary *Dict,unsigned flags);
/* Adds one element. Given string is copied */
static int Add(Dictionary *Dict,unsigned char *key,void *Data);
/* Clears all data and frees the memory */
static int Clear(Dictionary *Dict);
/* Returns the string at the given position */
static void *GetElement(Dictionary *Dict,unsigned char *Key);
/* Removes the given string if found */
static bool Remove(Dictionary *Dict,unsigned char *);
/* Frees the memory used by the collection */
static int Finalize(Dictionary *Dict);
/* Calls the given function for all strings. "Arg" is a used supplied argument */
/* that can be NULL that is passed to the function to call */
static bool Apply(Dictionary *Dict,int (*Applyfn)(unsigned char *Key,void *,void * arg),void *arg);
/* Set or unset the error function */
static ErrorFunction SetErrorFunction(Dictionary *Dict,ErrorFunction fn);

static DictionaryInterface Interface = {
GetCount,
GetFlags,
SetFlags,
Add,
Clear,
GetElement,
Remove,
Finalize,
Apply,
SetErrorFunction,
};

static unsigned scatter[] = { /* Map characters to random values */
3376649973u, 2288603946u, 1954268477u, 2858154129u, 3254987376u,
1888560329u, 2079711150u, 1249903931u, 2056019508u, 3475721719u,
2183608578u, 1948585191u, 3510366957u, 479341015u, 137912281u,
1856397162u, 701025146u, 3777855647u, 3133726730u, 4113368641u,
251772918u, 2859869442u, 824540103u, 614317204u, 3085688794u,
1104489690u, 3600905459u, 1036657084u, 1960148944u, 2441465117u,
3633092952u, 1202079507u, 1804386472u, 3798190281u, 2511419699u,
1032473403u, 3235883220u, 2593233477u, 2484192352u, 1834174643u,
3630460796u, 3436981729u, 876656665u, 1144446061u, 2179315054u,
2142937421u, 1163901871u, 703364539u, 1635510196u, 1558480853u,
3800782692u, 604753589u, 3558571372u, 274373881u, 183696063u,
4013401969u, 3787387983u, 551169993u, 2706792174u, 475596077u,
784566245u, 2043924368u, 1567342084u, 3331009165u, 150886268u,
596437426u, 2420547845u, 2898343441u, 1643521607u, 1387052253u,
691524517u, 1709282085u, 2105726706u, 326318904u, 2270893751u,
1547094850u, 273913063u, 1180303327u, 1015098316u, 1122706416u,
1025137522u, 1445737386u, 3992079916u, 3230843455u, 3002906788u,
3543652723u, 1755107124u, 1921014418u, 683842306u, 2503306554u,
3688139822u, 3812611237u, 3363198012u, 1643682998u, 285631714u,
1910683492u, 4281003621u, 3709237568u, 2736065042u, 1422760317u,
862182498u, 2248178396u, 3197393735u, 3974531276u, 157092128u,
3859796014u, 851355354u, 2511336234u, 3700246600u, 572627716u,
1519995253u, 342913937u, 328362706u, 3497158594u, 739312110u,
1482159142u, 4059308452u, 1115275813u, 2279798033u, 3563459711u,
102382981u, 698626900u, 2506327534u, 2223405777u, 1827275406u,
159038005u, 4159863896u, 3470995235u, 130302168u, 1077990744u,
1441602901u, 2757433577u, 200115595u, 993264331u, 2598999266u,
3842878136u, 3530540372u, 1361428823u, 248277624u, 1339695154u,
432480863u, 2895143187u, 3166708344u, 2393286685u, 4271569970u,
869342786u, 473223354u, 126812611u, 3904940903u, 1637555894u,
996061127u, 1088298011u, 2952176066u, 2858912209u, 4228613491u,
4236158822u, 2582423590u, 2525024672u, 3677112391u, 3629698756u,
1496034522u, 2081171139u, 2352170546u, 176561938u, 3553901024u,
1142683711u, 2409311685u, 672560988u, 3693784086u, 689665476u,
1992869305u, 2102947696u, 1890679203u, 2387696458u, 1988263978u,
1536664131u, 768867302u, 2456175399u, 3136223828u, 202652382u,
4142812934u, 245277491u, 2630667112u, 240720193u, 2395371056u,
707955862u, 4095017737u, 3236774548u, 3681653056u, 3285235880u,
807411619u, 721125152u, 2671591148u, 4255706610u, 1694083953u,
3615121285u, 2744541524u, 2146568054u, 432941567u, 1070843254u,
2173029527u, 3630977578u, 3297023538u, 77429635u, 4131306785u,
1890732898u, 2010001485u, 1144304337u, 1673699809u, 1335369816u,
3596270401u, 3614930280u, 170584627u, 190006287u, 1491467787u,
821380901u, 196708749u, 986375533u, 3133295550u, 2991205574u,
3983654535u, 3338932148u, 2374084740u, 4292366978u, 3657247497u,
3856158535u, 1497347358u, 3204988225u, 2733738804u, 1120807021u,
450893717u, 2518878143u, 55245244u, 435713941u, 688959256u,
3878081060u, 3828717777u, 2111290183u, 3684667667u, 147090689u,
671188737u, 1379556449u, 1326383789u, 1628432838u, 462410620u,
544713991u, 1591539421u, 2938270133u, 1902128118u, 560215823u,
4293430683u, 1041753686u, 1365246147u, 2681506285u, 500008709u,
1129892475u
};

/*------------------------------------------------------------------------
Procedure: hash ID:1
Purpose: Returns the hash code for a given character string.
The characters are randomized through the
razndomizer table. This is from the source code of
lcc
Input: The character string to hash
Output: The hash code
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

/*------------------------------------------------------------------------
Procedure: newDictionary ID:1
Purpose: Constructs a new dictionary object and initializes
all fields
Input: A hint for the size of the number of elements this
table will hold
Output: A pointer to a newly allocated table
Errors: If no more memory is available returns NULL
------------------------------------------------------------------------*/
Dictionary *newDictionary(size_t hint,size_t elementSize)
{
Dictionary *Dict;
size_t i,allocSiz;
static unsigned primes[] = { 509, 509, 1021, 2053, 4093, 8191, 16381, 32771, 65521, 131071, 0 };
for (i = 1; primes < hint && primes > 0; i++)
;
allocSiz = sizeof (Dictionary);
Dict = MALLOC(allocSiz);
if (Dict == NULL)
return NULL;
memset(Dict,0,allocSiz);
allocSiz = primes[i-1]*sizeof (Dict->buckets[0]);
Dict->buckets = MALLOC(allocSiz);
if (Dict->buckets == NULL) {
FREE(Dict);
return NULL;
}
memset(Dict->buckets,0,allocSiz);
Dict->size = primes[i-1];
Dict->hash = hash;
Dict->lpVtbl = &Interface;
Dict->ElementSize = elementSize;
return Dict;
}
/*------------------------------------------------------------------------
Procedure: GetElement ID:1
Purpose: Returns an element given its key
Input: The dictionary and the key
Output: The object is added, and the number of objects in
the dictionary is returned. Zero or negative error
code if there is an error.
Errors: If the dictionary is read only or the key is NULL an
error code will be returned. Returns zero if there
is no more memory
------------------------------------------------------------------------*/
static void *GetElement(Dictionary *Dict,unsigned char *Key)
{
size_t i;
struct DataList *p;

if (Dict == NULL || Key == NULL)
return NULL;
i = (*Dict->hash)(Key)%Dict->size;
for (p = Dict->buckets; p; p = p->Next)
if (strcmp((char *)Key, (char *)p->Key) == 0)
return p->Value;
return NULL;
}
/*------------------------------------------------------------------------
Procedure: Add ID:1
Purpose: Adds one entry to the dictionary. If another entry
exists for the same key it will be replaced.
Input: The dictionary, the key, and the value to be added
Output: The number of items in the dictionary or a negative
error code
Errors: The container must be read/write.
------------------------------------------------------------------------*/
static int Add(Dictionary *Dict,unsigned char *Key, void *Value)
{
size_t i;
struct DataList *p;
unsigned char *tmp;

if (Dict->Flags & CONTAINER_READONLY)
return CONTAINER_ERROR_READONLY;
if (Key == NULL) {
Dict->RaiseError("Add",CONTAINER_ERROR_BADARG);
return CONTAINER_ERROR_BADARG;
}

i = (*Dict->hash)(Key)%Dict->size;
for (p = Dict->buckets; p; p = p->Next) {
if (strcmp((char *)Key, (char *)p->Key) == 0)
break;
}
if (p == NULL) {
p = MALLOC(sizeof(*p)+Dict->ElementSize);
if (p == NULL)
return 0;
if (Dict->ElementSize) {
p->Value = (void *)(p+1);
memcpy(p->Value,Value,Dict->ElementSize);
}
else p->Value = Value;
tmp = MALLOC(1+strlen((char *)Key));
strcpy((char *)tmp,(char *)Key);
p->Key = tmp;
i = (*Dict->hash)(Key)%Dict->size;
p->Next = Dict->buckets;
Dict->buckets = p;
Dict->count++;
}
else {
if (Dict->ElementSize) {
/* Overwrite the data for an existing element */
memcpy(p->Value,Value,Dict->ElementSize);
}
else p->Value = Value;
}
Dict->timestamp++;
return 1;
}
/*------------------------------------------------------------------------
Procedure: GetCount ID:1
Purpose: Returns the number of entries in the dictionary
Input: The dictionary
Output: The number of entries
Errors: None
------------------------------------------------------------------------*/
static size_t GetCount(Dictionary *Dict)
{
return Dict->count;
}

/*------------------------------------------------------------------------
Procedure: GetFlags ID:1
Purpose: Returns the flags of the given dictionary
Input: The dictionary
Output: The flags
Errors: None
------------------------------------------------------------------------*/
static unsigned GetFlags(Dictionary *Dict)
{
return Dict->Flags;
}

/*------------------------------------------------------------------------
Procedure: SetFlags ID:1
Purpose: Sets the flags field of a dictionary
Input: The dictionary and the flags to set
Output: Returns the old value of the flags
Errors: None
------------------------------------------------------------------------*/
static unsigned SetFlags(Dictionary *Dict,unsigned Flags)
{
unsigned oldFlags = Dict->Flags;
Dict->Flags = Flags;
return oldFlags;
}


/*------------------------------------------------------------------------
Procedure: Apply ID:1
Purpose: Calls the given function for each entry in the
dictionary.
Input: The dictionary, the function to call and an extra
argument to be passed to the called function at each
item.
Output: 1 if no errors, zero otherwise
Errors: If Dictionary is a NULL pointer or the function is a
NULL pointer returns zero.
------------------------------------------------------------------------*/
static bool Apply(Dictionary *Dict,int apply(unsigned char *Key,void *Value, void *ExtraArgs),
void *ExtraArgs)
{
size_t i;
unsigned stamp;
struct DataList *p;

if (Dict == NULL || apply == NULL)
return 0;
stamp = Dict->timestamp;
for (i = 0; i < Dict->size; i++) {
for (p = Dict->buckets; p; p = p->Next) {
apply(p->Key,p->Value, ExtraArgs);
if (Dict->timestamp != stamp)
return 0;
}
}
return 1;
}
/*------------------------------------------------------------------------
Procedure: Remove ID:1
Purpose: Erases an entry from the dictionary if the entry
exists
Input: The dictionary and the key to erase
Output: Returns 1 if the key was found and erased, zero
otherwise.
Errors: If the dictionary pointer is NULL or the key is NULL
returns zero.
------------------------------------------------------------------------*/
static bool Remove(Dictionary *Dict,unsigned char *Key)
{
size_t i;
struct DataList **pp;

if (Dict == NULL || Key == NULL)
return 0;
Dict->timestamp++;
i = (*Dict->hash)(Key)%Dict->size;
for (pp = &Dict->buckets; *pp; pp = &(*pp)->Next)
if (strcmp((char *)Key, (char *)(*pp)->Key) == 0) {
struct DataList *p = *pp;
*pp = p->Next;
FREE(p->Key);
FREE(p);
Dict->count--;
return 1;
}
return 0;
}
/*------------------------------------------------------------------------
Procedure: Clear ID:1
Purpose: Clear all the entries in the given dictionary
Input: The dictionary to clear
Output: 1 if succeeds, zero otherwise
Errors: The dictionary must be read/write
------------------------------------------------------------------------*/
static int Clear(Dictionary *Dict)
{
if (Dict == NULL)
return 0;
if (Dict->Flags & CONTAINER_READONLY)
return 0;
if (Dict->count > 0) {
size_t i;
struct DataList *p, *q;
for (i = 0; i < Dict->size; i++)
for (p = Dict->buckets; p; p = q) {
q = p->Next;
FREE(p->Key);
FREE(p);
}
}
memset(Dict->buckets,0,Dict->size*sizeof(void *));
Dict->count=0;
return 1;
}

/*------------------------------------------------------------------------
Procedure: Finalize ID:1
Purpose: Clears all memory used by a dictionary and destroys
it.
Input: The dictionary to destroy
Output: Returns 1 for success, zero otherwise
Errors: Same as Clear: dictionary must be read/write
------------------------------------------------------------------------*/
static int Finalize(Dictionary *Dict)
{
if (!Clear(Dict))
return 0;
FREE(Dict->buckets);
FREE(Dict);
return 1;
}

static ErrorFunction SetErrorFunction(Dictionary *Dict,ErrorFunction fn)
{
ErrorFunction old;
old = Dict->RaiseError;
Dict->RaiseError = (fn) ? fn : EmptyErrorFunction;
return old;
}

#include <stdio.h>
int main(void)
{
Dictionary *d = newDictionary(100,0);
int data[12],errors=0;
size_t count;
char *p;
int *pi;

data[1] = 1;
data[2] = 2;
d->lpVtbl->Add(d,(unsigned char *)"One",&data[1]);
d->lpVtbl->Add(d,(unsigned char *)"Two",&data[2]);
d->lpVtbl->Add(d,(unsigned char *)"long data","long string");
p = (char *)d->lpVtbl->GetElement(d,(unsigned char *)"long data");
if (strcmp(p,"long string"))
errors++;
pi = (int *)d->lpVtbl->GetElement(d,(unsigned char *)"Two");
if (*pi != 2)
errors++;
pi = (int *)d->lpVtbl->GetElement(d,(unsigned char *)"Two");
if (*pi != 2)
errors++;
count = d->lpVtbl->GetCount(d);
if (count != 3)
errors++;
count=d->lpVtbl->Remove(d,(unsigned char *)"long data");
if (count != 1)
errors++;
count = d->lpVtbl->GetCount(d);
if (count != 2)
errors++;
count=d->lpVtbl->Remove(d,(unsigned char *)"One");
count = d->lpVtbl->GetCount(d);
if (count != 1)
errors++;
if (errors)
printf("%d errors in dictionary\n",errors);
return errors;
}
 
B

Ben Bacarisse

A couple of points:
/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
Project: Container library
State: First try
Creation Date: November 2009
Description: This file implements the associative container
"Dictionary", that maps a character string to
some arbitrary data.

It would be more flexible if keys were any data at all. A string
table could then be written as a specialisation. It would be only a
few more lines.

static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

UB when strlen(str) == 0.

static void *GetElement(Dictionary *Dict,unsigned char *Key)

I think the public interface should use char rather than unsigned
char.
{
size_t i;
struct DataList *p;

if (Dict == NULL || Key == NULL)
return NULL;
i = (*Dict->hash)(Key)%Dict->size;

Unless I've misread the struct, it is clearer to write:

i = Dict->hash(Key) % Dict->size;

static int Add(Dictionary *Dict,unsigned char *Key, void *Value)
{
size_t i;
struct DataList *p;
unsigned char *tmp;

if (Dict->Flags & CONTAINER_READONLY)
return CONTAINER_ERROR_READONLY;
if (Key == NULL) {
Dict->RaiseError("Add",CONTAINER_ERROR_BADARG);
return CONTAINER_ERROR_BADARG;
}

i = (*Dict->hash)(Key)%Dict->size;
for (p = Dict->buckets; p; p = p->Next) {
if (strcmp((char *)Key, (char *)p->Key) == 0)
break;
}
if (p == NULL) {
p = MALLOC(sizeof(*p)+Dict->ElementSize);
if (p == NULL)
return 0;
if (Dict->ElementSize) {
p->Value = (void *)(p+1);


I don't think you can be sure that this pointer is properly aligned
for actual type. p->Value is a void * but the user will access
arbitrary data though it.

<snip>
 
H

Hallvard B Furuseth

jacob said:
static unsigned scatter[] = { /* Map characters to random values */
(...256 values...)
};

/*------------------------------------------------------------------------
Procedure: hash ID:1
(...)
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned

Maybe this illustrates a misunderstandig with char vs unsigned char
in another thread.

Unless that's just a poorly worded comment and you could have said
"you MUST pass a data type which the prototype supports", which is
pretty much implicit in any function definition:
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

No, the function works with unsigned char values, but that's a matter
internal to this function. Any data can safely be accessed through an
unsigned char* pointer to get at the raw representation.

So it works fine (well, almost below) to pass a signed char data do this
function - or signed int, for that matter. You just have to cast the
argument to unsigned char *. If you give the function a void* argument
instead, the function can do the cast instead of bothering the caller
with it.

Except: It does break if a value can have several representations in the
data's actual type. Likely you want all representations to hash to the
same value. For integer/character types it breaks if there are padding
bits and non-two's complement representations, then you'd need to access
the data through its actual type. For character types, my personal
solution is not to worry about it - i.e. not support such architectures.

But then, your hash function also breaks if CHAR_BIT > 8, so it's
written for the common case anyway.
 
J

jacob navia

Ben Bacarisse a écrit :
A couple of points:


It would be more flexible if keys were any data at all. A string
table could then be written as a specialisation. It would be only a
few more lines.

I am working in that generalization, with arbitrary keys and automatic resizing.
That will be a hash table container, not a dictionary. I thought that
having text data as key is a very important application of a hash table.
The interface is simpler since it doesn't need any comparison function
for the keys.
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

UB when strlen(str) == 0.

YES!

I didn't see that one.

Thank you.
I think the public interface should use char rather than unsigned
char.

The problem is the hash function, that needs unsigned char...
Yes, we had that discussion before (See "Problem with gcc"
thread).

Probably you are right.

{
size_t i;
struct DataList *p;

if (Dict == NULL || Key == NULL)
return NULL;
i = (*Dict->hash)(Key)%Dict->size;

Unless I've misread the struct, it is clearer to write:

i = Dict->hash(Key) % Dict->size;

True.

static int Add(Dictionary *Dict,unsigned char *Key, void *Value)
{
size_t i;
struct DataList *p;
unsigned char *tmp;

if (Dict->Flags & CONTAINER_READONLY)
return CONTAINER_ERROR_READONLY;
if (Key == NULL) {
Dict->RaiseError("Add",CONTAINER_ERROR_BADARG);
return CONTAINER_ERROR_BADARG;
}

i = (*Dict->hash)(Key)%Dict->size;
for (p = Dict->buckets; p; p = p->Next) {
if (strcmp((char *)Key, (char *)p->Key) == 0)
break;
}
if (p == NULL) {
p = MALLOC(sizeof(*p)+Dict->ElementSize);
if (p == NULL)
return 0;
if (Dict->ElementSize) {
p->Value = (void *)(p+1);


I don't think you can be sure that this pointer is properly aligned
for actual type. p->Value is a void * but the user will access
arbitrary data though it.

<snip>


I just wanted to avoid a double malloc call...
 
N

Nick

jacob navia said:
#ifdef UNIX
#define stricmp strcasecmp
#endif

What I do here is use a name that doesn't belong in the language space,
and so is unlikely to be used by another system:

#ifdef HAVE_STRCASECMP
#define caseless_strcmp(x,y) strcasecmp(x,y)
#else
#ifdef HAVE_STRICMP
#define caseless_strcmp(x,y) stricmp(x,y)
#else
#error Need to implement caseless string comparison here
#endif
#endif

The "HAVE_" macros come from autoconf.

This way people who know popular variants of C don't get confused and
think "hang on, what's this Windows function doing here?". They come
across a name they don't know, but which is fairly obvious, and can look
it up if necessary.
 
K

Keith Thompson

Nick said:
What I do here is use a name that doesn't belong in the language space,
and so is unlikely to be used by another system:

#ifdef HAVE_STRCASECMP
#define caseless_strcmp(x,y) strcasecmp(x,y)
#else
#ifdef HAVE_STRICMP
#define caseless_strcmp(x,y) stricmp(x,y)
#else
#error Need to implement caseless string comparison here
#endif
#endif

The "HAVE_" macros come from autoconf.

This way people who know popular variants of C don't get confused and
think "hang on, what's this Windows function doing here?". They come
across a name they don't know, but which is fairly obvious, and can look
it up if necessary.

Or you can just delete those three lines, since there are no further
references to either stricmp or strcasecmp in the rest of the article.
 
P

Phil Carmody

jacob navia said:
/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
[SNIP - crap code]

Not a const in sight. You are pitifully ill-equipped to be writing
C. Take up knitting instead, perhaps.

Phil
 
B

Ben Pfaff

jacob navia said:
/*------------------------------------------------------------------------
Procedure: hash ID:1
Purpose: Returns the hash code for a given character string.
The characters are randomized through the
razndomizer table. This is from the source code of
lcc
Input: The character string to hash
Output: The hash code
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

Why will this function not work for signed characters?
 
B

Ben Pfaff

Phil Carmody said:
jacob navia said:
/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
[SNIP - crap code]

Not a const in sight. You are pitifully ill-equipped to be writing
C. Take up knitting instead, perhaps.

It's a "first try" according to the comments. In my own code, at
least, I often don't worry about const on the first try, and then
I go back later and sprinkle it in as appropriate.
 
J

jacob navia

Phil Carmody a écrit :
jacob navia said:
/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
[SNIP - crap code]

Not a const in sight. You are pitifully ill-equipped to be writing
C. Take up knitting instead, perhaps.

Phil
Sure, when you learn to use a kill file!

I was in your kill file, you said a thousand times, boasting around...

Apparently you still didn't get it.
 
J

jacob navia

Ben Pfaff a écrit :
jacob navia said:
/*------------------------------------------------------------------------
Procedure: hash ID:1
Purpose: Returns the hash code for a given character string.
The characters are randomized through the
razndomizer table. This is from the source code of
lcc
Input: The character string to hash
Output: The hash code
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

Why will this function not work for signed characters?

because if the character is negative, the indexing of the array will not work
OK.
 
J

jacob navia

Ben Pfaff a écrit :
Phil Carmody said:
jacob navia said:
/*------------------------------------------------------------------------
Module: dictionary.c
Author: jacob
[SNIP - crap code]

Not a const in sight. You are pitifully ill-equipped to be writing
C. Take up knitting instead, perhaps.

It's a "first try" according to the comments. In my own code, at
least, I often don't worry about const on the first try, and then
I go back later and sprinkle it in as appropriate.

You can't get all the details in the first version. The objective
here is to see that the code is working, and doing what it should.

I consider "const correctness" belongs in the "polishing phase". It is
important, I agree, but not now.
 
B

Ben Pfaff

jacob navia said:
Ben Pfaff a écrit :
jacob navia said:
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

Why will this function not work for signed characters?

because if the character is negative, the indexing of the array will not work
OK.

But you access the characters as "unsigned char". How can there
be a problem, then?
 
R

Richard Tobin

because if the character is negative, the indexing of the array will not work
OK.

If the function is intended to work with ordinary character strings,
it would make sense to use char * in the interface, and cast the
characters to unsigned char before using them as integers.

-- Richard
 
J

jacob navia

Ben Pfaff a écrit :
jacob navia said:
Ben Pfaff a écrit :
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}
Why will this function not work for signed characters?
because if the character is negative, the indexing of the array will not work
OK.

But you access the characters as "unsigned char". How can there
be a problem, then?

No, in the code shown there are no problems. The comments
specifies that if you would change the type of the argument
to just char pointer it would not work. It is a misunderstanding,;
maybe I should change the comment to make it more explicit.
 
B

Ben Bacarisse

jacob navia said:
Ben Pfaff a écrit :
jacob navia said:
Ben Pfaff a écrit :

Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str)
{
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}
Why will this function not work for signed characters?
because if the character is negative, the indexing of the array will not work
OK.

But you access the characters as "unsigned char". How can there
be a problem, then?

No, in the code shown there are no problems. The comments
specifies that if you would change the type of the argument
to just char pointer it would not work. It is a misunderstanding,;
maybe I should change the comment to make it more explicit.

But then you'd have to comment almost every function you write with
"don't change the type of this argument". Most readers will accept
that changing the type of one or more parameters might have serious
consequences. It is usually left unsaid.
 
M

Moi

jacob navia said:
Ben Pfaff a écrit :
Errors: None. Note that this function will NOT work for
signed characters. The input MUST be unsigned
------------------------------------------------------------------------*/
static unsigned hash(unsigned char *str) {
unsigned int h = scatter[*str];
str++;
while(*str) {
h = (h >> 1) ^ scatter[*str++];
}
return h;
}

Why will this function not work for signed characters?

because if the character is negative, the indexing of the array will
not work OK.

But you access the characters as "unsigned char". How can there be a
problem, then?


The (unsigned char *) argument is strange.
It will force you to cast the arguments to this function *on every call*
An easier way would be to use a (void*) argument and cast it to
(unsigned char*) *inside the function*

For hashfunctions I normally use the form

****/
unsigned hash_da_thing(void * ptr, size_t siz)
{
unsigned hash;
unsigned char * cp= ptr;

if (!siz) siz = strlen(cp);

for(hash=0 ; siz>0; )
{ ... }
....
return hash;
}

****/


; this will allow non-string to be hashed, by providing a non-null size;
(plus: add some "const" ;-)

I think the Zobrist hashing is awkward, too.

HTH,
AvK
 
J

jacob navia

Ben Bacarisse a écrit :
But then you'd have to comment almost every function you write with
"don't change the type of this argument". Most readers will accept
that changing the type of one or more parameters might have serious
consequences. It is usually left unsaid.

Yeah, but after the discussion about signed/unsigned chars in this group
I thought that text should be always defined as char, and changed all places where
text was used into char, and in all those changes I forgot that place!

After some time in the debugger I discover my error, and I was frustrated
with all this problems. I added then the comment.

Actually, I should redo all the changes and change all the key data to
char *, and cast HERE the character into unsigned char. But I did not
feel like redoing all those changes a second time so it will stay like
this (at least in the near future)
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,744
Messages
2,569,484
Members
44,903
Latest member
orderPeak8CBDGummies

Latest Threads

Top