simple_hashtable/shash.c

175 lines
5.4 KiB
C

// This file contains the implementation of the functions used by the shash-hashtable library and is part of the shash library
// Copyright (C) 2023 theboring_XOR
// This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "shash.h"
static char *str_duplicate(char *str, size_t len)
{
char *new_str = malloc(len * sizeof(char));
if(new_str == NULL) return 0;
memcpy(new_str, str, len);
return new_str;
}
static int get_empty_hashtable_slot(shash_hashtable_t *hashtable)
{
assert(hashtable != NULL);
for (unsigned int i = 0; i < hashtable->table_size; i++)
{
if (hashtable->hash_table[i].key == 0)
return i;
}
// The hashtable is full
return -1;
}
int shash_init_hashtable(shash_hashtable_t *hashtable, unsigned int table_size)
{
// Initialize the RNG to a non-constant seed, to make the output less pseudo random
srand(time(NULL));
// Create a transformation table
hashtable->transformation_table = malloc((CHAR_MAX - CHAR_MIN) * sizeof(int));
if (hashtable->transformation_table == NULL)
{
return EXIT_FAILURE;
}
// assign random values to it
for (int i = 0; i < CHAR_MAX - CHAR_MIN; i++)
{
hashtable->transformation_table[i] = TRANSFORM_TABLE_MAX_RAND * rand() / RAND_MAX;
}
// Create the hash_table
hashtable->hash_table = malloc(table_size * sizeof(shash_table_element_t));
if (hashtable->hash_table == NULL)
{
return EXIT_FAILURE;
}
memset(hashtable->hash_table, 0, table_size * sizeof(shash_table_element_t));
hashtable->table_size = table_size;
return EXIT_SUCCESS;
}
unsigned int shash_hash(char *key, unsigned int len, shash_hashtable_t *hashtable)
{
assert(hashtable != NULL);
if (SIMULATE_COLLISIONS == 1)
{
return SIMULATED_COLLISION_HASH;
}
// Slight variation of cyclic polynomial hasing, as described in the Paper: "Recursive Hashing functions for n-Grams" by J. D. Cohen
unsigned int hash_word = 0;
for (unsigned int i = 0; i < len; i++)
{
hash_word = rot32_left(hash_word, 1);
hash_word = hash_word ^ hashtable->transformation_table[(unsigned int)key[i]];
}
return hash_word % hashtable->table_size;
}
static bool is_key_in_slot(shash_table_element_t slot, char *key, unsigned int len)
{
if(slot.keylen != len)
{
return 0;
}
unsigned int longer_key_lenght = slot.keylen > len ? slot.keylen : len;
if(strncmp(slot.key, key, longer_key_lenght) == 0)
{
return true;
}
return false;
}
int shash_set(char *key, unsigned int len, void *data, shash_hashtable_t *hashtable)
{
assert(key != NULL);
assert(data != NULL);
assert(hashtable != NULL);
unsigned int slot = shash_hash(key, len, hashtable);
// Loop to the end of the linked list
while (hashtable->hash_table[slot].encountered_collision != 0 && !is_key_in_slot(hashtable->hash_table[slot], key, len))
{
slot = hashtable->hash_table[slot].next_key_location;
}
shash_table_element_t table_element =
{
.key = str_duplicate(key, len),
.keylen = len,
.data = data};
// If there is no element already in the slot, we can just use it
if (hashtable->hash_table[slot].key == 0 || is_key_in_slot(hashtable->hash_table[slot], key, len))
{
hashtable->hash_table[slot] = table_element;
return EXIT_SUCCESS;
}
// If not, we need to handle the collision
else
{
int empty_slot = get_empty_hashtable_slot(hashtable);
if (empty_slot != -1)
{
hashtable->hash_table[slot].encountered_collision = 1;
hashtable->hash_table[slot].next_key_location = empty_slot;
hashtable->hash_table[empty_slot] = table_element;
return EXIT_SUCCESS;
}
}
// hashtable full
return EXIT_FAILURE;
}
void *shash_get(char *key, unsigned int len, shash_hashtable_t *hashtable)
{
assert(key != NULL);
assert(hashtable != NULL);
unsigned int slot = shash_hash(key, len, hashtable);
// Itereate through the link list until we find the right element
while (!is_key_in_slot(hashtable->hash_table[slot], key, len))
{
if (hashtable->hash_table[slot].encountered_collision == 1)
{
slot = hashtable->hash_table[slot].next_key_location;
}
else
{
/* Invalid key
this return value cannot be identified as an error from outside, TODO: fix */
return NULL;
}
}
return hashtable->hash_table[slot].data;
}
void shash_destroy_hashtable(shash_hashtable_t *hashtable)
{
assert(hashtable != 0);
for (unsigned int i = 0; i < hashtable->table_size; i++)
{
if (hashtable->hash_table[i].key != NULL)
{
free(hashtable->hash_table[i].key);
}
}
free(hashtable->transformation_table);
free(hashtable->hash_table);
}