207 lines
5.7 KiB
C
207 lines
5.7 KiB
C
#include <assert.h>
|
|
#include <limits.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
#include <time.h>
|
|
|
|
#define WORD 32
|
|
#define TRANSFORM_TABLE_MAX_RAND 4294967296
|
|
#define DELTA 1
|
|
|
|
// If set to 1, all strings will be hashed to SIMULATED_COLLISON_HASH
|
|
#define SIMULATE_COLLISIONS 0
|
|
#define SIMULATED_COLLISION_HASH 20
|
|
|
|
#ifdef __clang__
|
|
#include <builtins.h>
|
|
#define rot32_left(x, y) __builtin_rotateleft32(x, y)
|
|
#else
|
|
#define rot32_left(x, y) (x << y) | (x >> 32 - y)
|
|
#endif
|
|
|
|
typedef struct
|
|
{
|
|
char *key;
|
|
void *data;
|
|
|
|
// gets set to 1 if another key, collides with this elemnts location
|
|
u_int8_t encountered_collision;
|
|
|
|
// On collision, this field stores, where in the hash table array the second key (with the same hash) is located
|
|
int next_key_location;
|
|
|
|
} shash_table_element_t;
|
|
|
|
// Contains everything the functions for the hashtables need, to work with, including the hash table itself
|
|
typedef struct
|
|
{
|
|
unsigned int *transformation_table;
|
|
shash_table_element_t *hash_table;
|
|
unsigned int table_size;
|
|
} shash_hashtable_t;
|
|
|
|
// Returns -1 when the hashtable is full
|
|
int get_empty_hashtable_slot(shash_hashtable_t *hashtable)
|
|
{
|
|
assert(hashtable != NULL);
|
|
|
|
for (unsigned int i = 0; i < hashtable->table_size; i++)
|
|
{
|
|
if (hashtable->hash_table[i].key == 0)
|
|
return i;
|
|
}
|
|
// The hashtable is full
|
|
return -1;
|
|
}
|
|
|
|
int shash_init_hashtable(shash_hashtable_t *hashtable, unsigned int table_size)
|
|
{
|
|
// Initialize the RNG to a non-constant value, to make the output less pseudo random
|
|
srand(time(NULL));
|
|
|
|
// Create a transformation table
|
|
hashtable->transformation_table = malloc((CHAR_MAX - CHAR_MIN) * sizeof(int));
|
|
if (hashtable->transformation_table == NULL)
|
|
{
|
|
return EXIT_FAILURE;
|
|
}
|
|
// assign random values to it
|
|
for (int i = 0; i < CHAR_MAX - CHAR_MIN; i++)
|
|
{
|
|
hashtable->transformation_table[i] = TRANSFORM_TABLE_MAX_RAND * rand() / RAND_MAX;
|
|
}
|
|
|
|
// Create the hash_table
|
|
hashtable->hash_table = malloc(table_size * sizeof(shash_table_element_t));
|
|
if (hashtable->hash_table == NULL)
|
|
{
|
|
return EXIT_FAILURE;
|
|
}
|
|
memset(hashtable->hash_table, 0, table_size * sizeof(shash_table_element_t));
|
|
|
|
hashtable->table_size = table_size;
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
unsigned int shash_hash(char *key, unsigned int len, shash_hashtable_t *hashtable)
|
|
{
|
|
assert(hashtable != NULL);
|
|
|
|
if (SIMULATE_COLLISIONS == 1)
|
|
{
|
|
return SIMULATED_COLLISION_HASH;
|
|
}
|
|
// Slight variation of cyclic polynomial hasing, as described in the Paper: "Recursive Hashing functions for n-Grams" by J. D. Cohen
|
|
unsigned int hash_word = 0;
|
|
for (unsigned int i = 0; i < len; i++)
|
|
{
|
|
hash_word = rot32_left(hash_word, 1);
|
|
hash_word = hash_word ^ hashtable->transformation_table[(unsigned int)key[i]];
|
|
}
|
|
|
|
return hash_word % hashtable->table_size;
|
|
}
|
|
|
|
int shash_set(char *key, unsigned int len, void *data, shash_hashtable_t *hashtable)
|
|
{
|
|
assert(key != NULL);
|
|
assert(data != NULL);
|
|
assert(hashtable != NULL);
|
|
|
|
unsigned int slot = shash_hash(key, len, hashtable);
|
|
|
|
// Loop to the end of the linked list
|
|
while (hashtable->hash_table[slot].encountered_collision != 0 && strcmp(hashtable->hash_table[slot].key, key) != 0)
|
|
{
|
|
slot = hashtable->hash_table[slot].next_key_location;
|
|
}
|
|
|
|
shash_table_element_t table_element =
|
|
{
|
|
.key = strndup(key, len),
|
|
.data = data};
|
|
|
|
// If there is no element already in the slot, we can just use it
|
|
if (hashtable->hash_table[slot].key == 0)
|
|
{
|
|
hashtable->hash_table[slot] = table_element;
|
|
return EXIT_SUCCESS;
|
|
}
|
|
// If not, we need to handle the collision
|
|
else
|
|
{
|
|
int empty_slot = get_empty_hashtable_slot(hashtable);
|
|
if (empty_slot != -1)
|
|
{
|
|
hashtable->hash_table[slot].encountered_collision = 1;
|
|
hashtable->hash_table[slot].next_key_location = empty_slot;
|
|
hashtable->hash_table[empty_slot] = table_element;
|
|
return EXIT_SUCCESS;
|
|
}
|
|
}
|
|
// hashtable full
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
void *shash_get(char *key, unsigned int len, shash_hashtable_t *hashtable)
|
|
{
|
|
assert(key != NULL);
|
|
assert(hashtable != NULL);
|
|
|
|
unsigned int slot = shash_hash(key, len, hashtable);
|
|
|
|
// Itereate through the link list until we find the right element
|
|
while (strcmp(hashtable->hash_table[slot].key, key) != 0)
|
|
{
|
|
if (hashtable->hash_table[slot].encountered_collision == 1)
|
|
{
|
|
slot = hashtable->hash_table[slot].next_key_location;
|
|
}
|
|
else
|
|
{
|
|
/* Invalid key
|
|
this return value cannot be identified as an error from outside, TODO: fix */
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return hashtable->hash_table[slot].data;
|
|
}
|
|
|
|
void shash_destroy_hashtable(shash_hashtable_t *hashtable)
|
|
{
|
|
assert(hashtable != 0);
|
|
|
|
for (unsigned int i = 0; i < hashtable->table_size; i++)
|
|
{
|
|
if (hashtable->hash_table[i].key != NULL)
|
|
{
|
|
free(hashtable->hash_table[i].key);
|
|
}
|
|
}
|
|
free(hashtable->transformation_table);
|
|
free(hashtable->hash_table);
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
// Initialize an empty hashtable
|
|
shash_hashtable_t hashtable;
|
|
shash_init_hashtable(&hashtable, 100);
|
|
|
|
// Store some data
|
|
shash_set("FOO", 3, "Hello", &hashtable);
|
|
shash_set("BAR", 3, "World!", &hashtable);
|
|
|
|
// And retrieve it
|
|
char *retrieved_foo = shash_get("FOO", 3, &hashtable);
|
|
char *retrieved_bar = shash_get("BAR", 3, &hashtable);
|
|
printf("%s, %s\n", retrieved_foo, retrieved_bar);
|
|
|
|
// Destroy the hashtable
|
|
shash_destroy_hashtable(&hashtable);
|
|
|
|
return 0;
|
|
}
|