JetCracker

Life-time learner's blog

Hash Table: C implementation

When we studied data structures at my university, we were given a task to implement the one we wanted, using any programming language we liked (C, C++ or Java). I chose C because it is close to operating system and hardware. Writing in C makes you understand how your programme works. Each line of code is valuable and may effect your application performance positively or negatively. The data structure I chose to implement was Hash Table

Here is my implementation of Hash Table, which I  wrote more than year ago for my IT class.

chain_hash.h

/**
 * Hash Table with chains (header file)
 * Serves elements in raw binary arrays. Provides basic routines such, as: get, set, remove and find.
 * The idea comes from the book
 * of D. Knut. Theory and practice of programming. Sorting and Searching.
 * All memory is allocated dynamicaly. See chain_hash_test.c for manual.
 * @file chain_hash.h
 * @author Anton Danshin, 915 group, (c) MIPT - 2011
 * @date 18 Feb 2011, 13:00 MSK
 */

#ifndef CHAIN_HASH_H

#include <stdlib.h>
#include <stdio.h>
#include <memory.h>

#define CHAIN_HASH_H

/**
 * Default Hash Table capacity (maximum number of elements)
 */
#define CHAIN_HASH_TABLE_CAPACITY 100003

/**
 * Creates new instance of Hash Table with specified <code>capacity</code>
 * or set it to default value (if <code>capacity==0</code>)
 * @param capacity 0 if you want it to be default <code>CHAIN_HASH_TABLE_CAPACITY</code>
 * @return Descriptor of new hash table instance or -1 in case of error
 */
int ch_create(unsigned int capacity);

/**
 * Removes Hash Table with specified descriptor <code>id</code>
 * and frees all allocated memory
 * @param id Hash Table descriptor
 * @return 0 on success or -1 in case of error
 */
int ch_free(const unsigned int id);

/**
 * Finds element by <code>key</code> in Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @return Pointer to data or NULL if there is no element with such key.
 */
Element * ch_get(unsigned int id, const char * key, unsigned int key_size);

/**
 * Finds element by <code>value</code> in Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param value Value
 * @param val_size Size of value
 * @return Pointer to data or NULL if there is no element with such value.
 */
Element * ch_find(unsigned int id, const char * value, unsigned int val_size);

/**
 * Removes element with <code>key</code> from Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @return 0 on success or -1 in case of error (does not exist).
 */
int ch_remove(unsigned int id, const char * key, unsigned int key_size);

/**
 * Set element by <code>key</code> in Hash Table with specified descriptor <code>id</code>
 * to new <code>value</code>.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @param value Value
 * @param val_size Size of value
 * @return Pointer to data or NULL if there is no element with such key.
 */
Element * ch_set(unsigned int id, const char * key, unsigned int key_size, const char * value, unsigned int val_size);

/**
 * Evaluates hash-code of specified <code>key</code>.
 * @param key Pointer to key
 * @param key_size Size of key in bytes
 * @return hash-code of <code>key</code>
 */
unsigned int ch_hash(const char * key, unsigned int key_size);

/**
 * This function returns the Iterator of the first element of hash-table
 * with specified descriptor <code>id</code>.
 * @param id Descriptor of hash_table
 * @return Iterator to the first element of hash table or NULL in case of error
 */
Iterator * ch_iterator(unsigned int id);

//#include "chain_hash.c"

#endif

chain_hash.c

/**
 * Hash Table with chains (implementation file)
 * Serves elements in raw binary arrays. Provides basic routines such, as: get, set, remove and find.
 * The idea comes from the book of D. Knut.
 * "Theory and practice of programming. Sorting and Searching."
 * All memory is allocated dynamicaly. See chain_hash_test.c for manual.
 * @file chain_hash.c
 * @author Anton Danshin, 915 group, (c) MIPT - 2011
 * @date 20 Feb 2011, 16:00 MSK
 */

#include "chain_hash.h"
#ifndef CHAIN_HASH_C

#define CHAIN_HASH_C

/**
 * Key-Value structure
 */
typedef struct _Element {
  char * value;
  char * key;
  unsigned int key_size;
  unsigned int val_size;
} Element;

/**
 * Double-Linked list for iterations between Elements of hash table
 */
typedef struct _Iterator {
  struct _Iterator * next, * prev;
  Element * data;
} Iterator;

/**
 * Single-Linked List. It is used for processing collisions.
 */
typedef struct _Chain{
  // Points to the next element with the same hash code or NULL
  struct _Chain * next;
  // Points to the element in the list of iterators
  Iterator * iterator;
  // Data: key, value
  Element data;
} Chain;

/**
 * Hash Table structure
 */
typedef struct _tagChainHashTable{
  // Number of elements
  unsigned int count;
  // Table capacity
  unsigned int capacity;
  // Hash table array (each element is a pointer to chain)
  Chain ** elements;
  // Points to the first element of the table
  Iterator * iterator;
} ChainHashTable;

/**
 * Static array containing pointers to Hash Tables currently created
 */
static ChainHashTable ** _ch_array = NULL;

/**
 * Number of Hash Tables created by now.
 * Contains -1 if Hash Tables have never been created,
 * or non-negative integer value - number of tables created.
 */
static int _ch_count = -1;

/**
 * ID Counter
 */
static unsigned int _ch_id = 0;

/**
 * Creates new instance of Hash Table with specified <code>capacity</code>
 * or set it to default value (if <code>capacity==0</code>)
 * @param capacity 0 if you want it to be default <code>CHAIN_HASH_TABLE_CAPACITY</code>
 * @return Descriptor of new hash table instance or -1 in case of error
 */
int ch_create(unsigned int capacity){
  if(capacity==0) capacity = CHAIN_HASH_TABLE_CAPACITY;

  if(_ch_count<=0){ // There are no tables
    _ch_id = 0;
    _ch_array = (ChainHashTable **)malloc(sizeof(ChainHashTable *));

    if(_ch_array==NULL) // Cannot allocate memory for the table array.
      return -1;

    _ch_array[0] = (ChainHashTable *)malloc(sizeof(ChainHashTable));
    if(_ch_array[0]==NULL){ // Cannot allocate memory for the table struct.
      free(_ch_array);
      return -1;
    }

    _ch_array[0]->count = 0;
    _ch_array[0]->capacity = capacity;
    _ch_array[0]->elements = (Chain **)malloc(sizeof(Chain *)*capacity);

    if(_ch_array[0]->elements==NULL){ // Cannot allocate memory for the elements.
      free(_ch_array);
      return -1;
    };

    unsigned int i;
    for(i=0;i<_ch_array[0]->capacity;i++)
      _ch_array[0]->elements[i] = NULL;
    _ch_array[0]->iterator = NULL;
    _ch_count = 1;
  } else {
    _ch_id++;

    _ch_array = (ChainHashTable **)realloc(_ch_array, sizeof(ChainHashTable *)*(_ch_id+1));
    if(_ch_array==NULL) // Cannot allocate memory for the table struct.
      return -1;
    _ch_array[_ch_id] = (ChainHashTable *)malloc(sizeof(ChainHashTable));

    if(_ch_array[_ch_id] == NULL){
      _ch_id--;
      _ch_array = (ChainHashTable **)realloc(_ch_array, sizeof(ChainHashTable *)*(_ch_id+1));
      return -1;
    }

    _ch_array[_ch_id]->count = 0;
    _ch_array[_ch_id]->capacity = capacity;
    _ch_array[_ch_id]->elements = (Chain **)malloc(sizeof(Chain *)*capacity);

    if(_ch_array[_ch_id]->elements==NULL){ // Cannot allocate memory for the elements.
      free(_ch_array[_ch_id]);
      return -1;
    }

    unsigned int i;
    for(i=0;i<_ch_array[_ch_id]->capacity;i++)
      _ch_array[_ch_id]->elements[i] = NULL;
    _ch_array[_ch_id]->iterator = NULL;
    _ch_count++;
  }

  return _ch_id; // Success! Return Table Descriptor.
}

/**
 * Removes Hash Table with specified descriptor <code>id</code>
 * and frees all allocated memory
 * @param id Hash Table descriptor
 * @return 0 on success or -1 in case of error
 */
int ch_free(const unsigned int id){
  if(id>_ch_id){ // Invalid Hash Table Descriptor
    return -1;
  }
  if(_ch_array[id]==NULL){ // Invalid Hash Table Descriptor
   return -1;
  }

  while(_ch_array[id]->iterator != NULL)
    ch_remove(id, _ch_array[id]->iterator->data->key, _ch_array[id]->iterator->data->key_size);

  free(_ch_array[id]->elements);
  free(_ch_array[id]);

  _ch_array[id]=NULL;
  _ch_count--;
  // There are no tables left => we can reset ID counter
  if(_ch_count==0){
      free(_ch_array);
      _ch_array = NULL;
      _ch_id = 0;
  }
  return 0;
}

/**
 * Finds element by <code>key</code> in Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @return Pointer to data or NULL if there is no element with such key.
 */
Element * ch_get(unsigned int id, const char * key, unsigned int key_size){
  if(id>_ch_id) // Invalid Descriptor
    return NULL;

  if(_ch_array[id]==NULL) // Table was removed!
    return NULL;
  // Compute hash-code
  unsigned int h = ch_hash(key, key_size) % _ch_array[id]->capacity;

  if(_ch_array[id]->elements[h] == NULL) // No such element
    return NULL;

  //The element might exist => find it
  Chain * chain = _ch_array[id]->elements[h];
  while(chain!=NULL){
    if(chain->data.key_size==key_size && memcmp(key, chain->data.key, key_size)==0) break;
    else chain = chain->next;
  }

  if(chain==NULL) // Not found
    return NULL;
  else return &(chain->data); // Success!
}

/**
 * Finds element by <code>value</code> in Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param value Value
 * @param val_size Size of value
 * @return Pointer to data or NULL if there is no element with such value.
 */
Element * ch_find(unsigned int id, const char * value, unsigned int val_size){
  perror("chain_hash::ch_find: Sorry, this feature is not implemented, yet.");
  if(id<_ch_id) return NULL;
  if(_ch_array[id]==NULL) return NULL;
  return NULL;
}

/**
 * Removes element with <code>key</code> from Hash Table with specified descriptor <code>id</code>
 * or finds out that it does not exist.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @return 0 on success or -1 in case of error (does not exist).
 */
int ch_remove(unsigned int id, const char * key, unsigned int key_size){
  if(id>_ch_id) // Invalid Descriptor
    return -1;
  if(_ch_array[id]==NULL) // Table was removed!
    return -1;

  unsigned int h = ch_hash(key, key_size) % _ch_array[id]->capacity;
  if(_ch_array[id]->elements[h]==NULL) //
    return -1;

  Chain * chain = _ch_array[id]->elements[h];
  Chain * prev = NULL;

  while(chain!=NULL){
    if(chain->data.key_size==key_size && memcmp(key, chain->data.key, key_size)==0) break;
    prev = chain;
    chain = chain->next;
  }

  if(chain==NULL) return -1;
  // Removing element
  if(prev==NULL){
    // chain points to the first
    _ch_array[id]->elements[h] = chain->next;
  } else {
    prev->next = chain->next;
  }
  if(chain->data.key!=NULL)
    free(chain->data.key);
  if(chain->data.value!=NULL)
    free(chain->data.value);
  if(chain->iterator!=NULL){
    if(chain->iterator->prev==NULL){
      _ch_array[id]->iterator = chain->iterator->next;
      if(_ch_array[id]->iterator!=NULL)
		  _ch_array[id]->iterator-> prev = NULL;
    } else
      chain->iterator->prev->next = chain->iterator->next;
    free(chain->iterator);
  }
  free(chain);
  return 0;
}

/**
 * Sets element by <code>key</code> in Hash Table with specified descriptor <code>id</code>
 * to new <code>value</code>.
 * @param id Descriptor of Hash Table
 * @param key Key
 * @param key_size Size of key
 * @param value Value
 * @param val_size Size of value
 * @return Pointer to data or NULL if there is no element with such key.
 */
Element * ch_set(unsigned int id, const char * key, unsigned int key_size,
	      const char * value, unsigned int val_size){
  int flag_new = 0;
  if(id>_ch_id) // Invalid Descriptor
    return NULL;
  if(_ch_array[id]==NULL) // Table was removed!
    return NULL;

  // Compute hash-code
  unsigned int h = ch_hash(key, key_size) % _ch_array[id]->capacity;

  Chain * chain;
  if(_ch_array[id]->elements[h] == NULL){ // Add new element
    chain = (Chain *)malloc(sizeof(Chain));
    if(chain == NULL) // Failed to allocate memory
      return NULL;

    chain->next = NULL;

    chain->data.value = (char *)malloc(val_size);
    memcpy(chain->data.value, value, val_size);
    chain->data.val_size = val_size;

    chain->data.key = (char *)malloc(key_size);
    memcpy(chain->data.key, key, key_size);
    chain->data.key_size = key_size;

    _ch_array[id]->elements[h] = chain;
	flag_new = 1;
  } else { // Collision!
    chain = _ch_array[id]->elements[h];

    while(chain->next!=NULL){
      if(key_size==chain->data.key_size && memcmp(key, chain->data.key, key_size)==0)
	break;
      else chain = chain->next;
    }

    if(key_size==chain->data.key_size && memcmp(key, chain->data.key, key_size)==0){
      // The key exists
      chain->data.value = (char *)realloc(chain->data.value, val_size);
      memcpy(chain->data.value, value, val_size);
    } else { // New key
      flag_new = 1;
      chain->next = (Chain *)malloc(sizeof(Chain));
      chain = chain->next;

      if(chain==NULL) // Cannot allocate memory
	return NULL;

      chain->next = NULL;

      chain->data.value = (char *)malloc(val_size);
      memcpy(chain->data.value, value, val_size);
      chain->data.val_size = val_size;

      chain->data.key = (char *)malloc(key_size);
      memcpy(chain->data.key, key, key_size);
      chain->data.key_size = key_size;
    }
  }

  if(flag_new){
    Iterator * iter = (Iterator *)malloc(sizeof(Iterator));
    iter->next = _ch_array[id]->iterator;
    if(_ch_array[id]->iterator!=NULL) _ch_array[id]->iterator->prev = iter;
    iter->data = &(chain->data);
    iter->prev = NULL;
    chain->iterator = iter;
    _ch_array[id]->iterator = iter;
  }
  return &(chain->data);
}

/**
 * Computes a hash-code of specified <code>key</code>.
 * Uses algorithm Adler-32.
 * @param key Pointer to key
 * @param key_size Size of key in bytes
 * @return hash-code of <code>key</code>
 * @link http://en.wikipedia.org/wiki/Adler-32 "Adler-32 (Eng)"
 */
unsigned int ch_hash(const char * key, unsigned int key_size){
  short s1 = 1;
  short s2 = 0;
  unsigned int n;
  for (n=0; n<key_size; n++){
    s1 = (s1 + key[n]) % 65521;
    s2 = (s2 + s1)     % 65521;
  }
  return (s2 << 16) + s1;
}

/**
 * This function returns the Iterator of the first element of hash-table
 * with specified descriptor <code>id</code>.
 * @param id Descriptor of hash_table
 * @return Iterator to the first element of hash table or NULL in case of error
 */
Iterator * ch_iterator(unsigned int id){
	if(id>_ch_id) // No such descriptor
		return NULL;
	if(_ch_array[id]==NULL) // Table no longer exists
		return NULL;
	return _ch_array[id]->iterator; // OK!
}

#endif

Showcase example

/**
 * This example shows how to use 'chain_hash.h' library.
 * @file chain_hash_test.c
 * @author Anton Danshin, 953 group. (c) MIPT - 2011
 * @date 20 Feb 2011, 22:00 MSK
 */

#include <stdlib.h>
#include <stdio.h>

#include "chain_hash.h"

int main(){
  // Create one instance of ChainHashTable with default capacity
  int i;
  for(i=0;i<10000;i++){
  int id = ch_create(0);
  printf("Table created with ID: %i\n", id);
  printf("ch_set: key=hello, value=привет\n");
  ch_set(id, "hello", 5, "привет", 6);
  printf("ch_set: key=hello, value=no\n");
  ch_set(id, "hello", 5, "no", 2);
  int id2 = ch_create(0);
  printf("Table created with ID2: %i\n", id2);
  ch_set(id2, "hello", 5, "черт!!!", strlen("черт!!!"));
  puts(ch_get(id2, "hello", 5)->value);
  ch_remove(id, "hello", 5);
  ch_remove(id2, "none", 4);
  if(ch_remove(id, "hello", 5)<0) puts("no!");
  ch_free(id);
  ch_free(id2);

  id = ch_create(0);
  printf("ID: %i\n", id);
  ch_set(id, "hello", 5, "привет", 6);
  ch_set(id, "hello", 5, "no", 2);
  id2 = ch_create(0);
  printf("ID2: %i\n", id2);
  ch_set(id2, "hello", 5, "черт!!!", strlen("черт!!!"));
  puts(ch_get(id2, "hello", 5)->value);
  ch_remove(id, "hello", 5);
  ch_remove(id2, "none", 4);
  if(ch_remove(id, "hello", 5)<0) puts("no!");
  ch_free(id);
  ch_free(id2);
  }
  return 0;
}

I tested it, and it worked just fine! Valgrind did not detected any errors or memory leaks and performance is comparable to C++ STL hash map. Hope this code is helpful for someone.

English: hash table illustration, with three k...

Image via Wikipedia

Anton Danshin

Advertisements

2 responses to “Hash Table: C implementation

  1. Pingback: [Unix C] ID implementation « JetCracker

  2. Pingback: Spring 2012: Overall stats « JetCracker

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: