aboutsummaryrefslogtreecommitdiffstats
path: root/libparc/parc/algol/parc_HashCodeTable.c
diff options
context:
space:
mode:
Diffstat (limited to 'libparc/parc/algol/parc_HashCodeTable.c')
-rwxr-xr-xlibparc/parc/algol/parc_HashCodeTable.c338
1 files changed, 338 insertions, 0 deletions
diff --git a/libparc/parc/algol/parc_HashCodeTable.c b/libparc/parc/algol/parc_HashCodeTable.c
new file mode 100755
index 00000000..ea1945b3
--- /dev/null
+++ b/libparc/parc/algol/parc_HashCodeTable.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implements an open-addressing hash table. We use linear probing of +1 per step.
+ *
+ * Table is rehashed when we reach 75% utilization.
+ * The table is rehashed if we go more than 10 linear probes without being able to insert.
+ *
+ * HashCodeTable is a wrapper that holds the key/data management functions. It also
+ * has LinearAddressingHashTable that is the actual hash table.
+ *
+ * This open-addressing table is inefficient for GET or DEL if the element does not exist.
+ * The whole table needs to be
+ *
+ */
+#include <config.h>
+
+#include <LongBow/runtime.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#include <parc/algol/parc_HashCodeTable.h>
+#include <parc/algol/parc_Memory.h>
+
+// minimum size if nothing specified
+#define MIN_SIZE 256
+
+// when we expand, use this factor
+#define EXPAND_FACTOR 2
+
+#define MAX_PROBE_LENGTH 20
+
+typedef enum {
+ ADD_OK, // we added the key
+ ADD_DUP, // the key is a duplicate
+ ADD_NOSPACE // ran out of space
+} PARCHashCodeTable_AddResult;
+
+typedef struct hashtable_entry {
+ // A hashtable entry is in use if the key is non-null
+ void *key;
+ void *data;
+ HashCodeType hashcode;
+} HashTableEntry;
+
+typedef struct linear_address_hash_table {
+ HashTableEntry *entries;
+
+ // Number of elements allocated
+ size_t tableLimit;
+
+ // Number of elements in use
+ size_t tableSize;
+
+ // When the tableSize equals or exceeds this
+ // threshold, we should expand and re-hash the tableāˆ«
+ size_t expandThreshold;
+} LinearAddressingHashTable;
+
+struct parc_hashcode_table {
+ LinearAddressingHashTable hashtable;
+
+ PARCHashCodeTable_KeyEqualsFunc keyEqualsFunc;
+ PARCHashCodeTable_HashCodeFunc keyHashCodeFunc;
+ PARCHashCodeTable_Destroyer keyDestroyer;
+ PARCHashCodeTable_Destroyer dataDestroyer;
+
+ unsigned expandCount;
+};
+
+static bool
+_findIndex(PARCHashCodeTable *table, const void *key, size_t *outputIndexPtr)
+{
+ size_t index, start;
+ HashCodeType hashcode;
+ LinearAddressingHashTable *innerTable;
+
+ innerTable = &table->hashtable;
+ hashcode = table->keyHashCodeFunc(key);
+ index = hashcode % innerTable->tableLimit;
+ start = index;
+
+
+ // check until we've gone MAX_PROBE_LENGTH
+ unsigned steps = 0;
+ do {
+ if (innerTable->entries[index].key != NULL) {
+ if ((innerTable->entries[index].hashcode == hashcode) && table->keyEqualsFunc(key, innerTable->entries[index].key)) {
+ // the key already exists in the table
+ *outputIndexPtr = index;
+ return true;
+ }
+ }
+ steps++;
+ index = index + 1;
+ if (index == innerTable->tableLimit) {
+ index = 0;
+ }
+ } while (index != start && steps < MAX_PROBE_LENGTH);
+
+ return false;
+}
+
+static PARCHashCodeTable_AddResult
+_innerTableAdd(LinearAddressingHashTable *innerTable, PARCHashCodeTable_KeyEqualsFunc keyEqualsFunc,
+ HashCodeType hashcode, void *key, void *data)
+{
+ size_t index = hashcode % innerTable->tableLimit;
+
+ unsigned steps = 0;
+
+ // we know the size < limit, so it will fit eventually
+ while (steps < MAX_PROBE_LENGTH) {
+ if (innerTable->entries[index].key == NULL) {
+ innerTable->entries[index].hashcode = hashcode;
+ innerTable->entries[index].key = key;
+ innerTable->entries[index].data = data;
+ innerTable->tableSize++;
+ return ADD_OK;
+ }
+
+ if ((innerTable->entries[index].hashcode == hashcode) && keyEqualsFunc(key, innerTable->entries[index].key)) {
+ // the key already exists in the table
+ return ADD_DUP;
+ }
+
+ steps++;
+ index = index + 1;
+ if (index == innerTable->tableLimit) {
+ index = 0;
+ }
+ }
+
+ return ADD_NOSPACE;
+}
+
+static PARCHashCodeTable_AddResult
+_rehash(LinearAddressingHashTable *old_table, LinearAddressingHashTable *new_table, PARCHashCodeTable_KeyEqualsFunc keyEqualsFunc)
+{
+ size_t i;
+ for (i = 0; i < old_table->tableLimit; i++) {
+ if (old_table->entries[i].key != NULL) {
+ PARCHashCodeTable_AddResult result = _innerTableAdd(new_table, keyEqualsFunc, old_table->entries[i].hashcode,
+ old_table->entries[i].key, old_table->entries[i].data);
+ if (result != ADD_OK) {
+ return result;
+ }
+ }
+ }
+ return ADD_OK;
+}
+
+static void
+_expand(PARCHashCodeTable *hashCodeTable)
+{
+ LinearAddressingHashTable temp_table;
+ LinearAddressingHashTable *old_table = &hashCodeTable->hashtable;
+
+ size_t expandby = EXPAND_FACTOR;
+
+ // start with a copy of the current table
+ PARCHashCodeTable_AddResult result = ADD_OK;
+ do {
+ hashCodeTable->expandCount++;
+
+ temp_table.tableSize = 0;
+ temp_table.tableLimit = old_table->tableLimit * expandby;
+ temp_table.expandThreshold = temp_table.tableLimit - temp_table.tableLimit / 4;
+ temp_table.entries = parcMemory_AllocateAndClear(temp_table.tableLimit * sizeof(HashTableEntry));
+ assertNotNull(temp_table.entries, "parcMemory_AllocateAndClear(%zu) returned NULL", temp_table.tableLimit * sizeof(HashTableEntry));
+
+ result = _rehash(old_table, &temp_table, hashCodeTable->keyEqualsFunc);
+ if (result == ADD_NOSPACE) {
+ // could not rehash, so expand by more and try again
+ parcMemory_Deallocate((void **) &(temp_table.entries));
+ expandby++;
+ }
+ } while (result == ADD_NOSPACE);
+
+ parcMemory_Deallocate((void **) &old_table->entries);
+ hashCodeTable->hashtable = temp_table;
+}
+
+PARCHashCodeTable *
+parcHashCodeTable_Create_Size(PARCHashCodeTable_KeyEqualsFunc keyEqualsFunc,
+ PARCHashCodeTable_HashCodeFunc keyHashCodeFunc,
+ PARCHashCodeTable_Destroyer keyDestroyer,
+ PARCHashCodeTable_Destroyer dataDestroyer,
+ size_t minimumSize)
+{
+ PARCHashCodeTable *table = parcMemory_AllocateAndClear(sizeof(PARCHashCodeTable));
+ assertNotNull(table, "parcMemory_AllocateAndClear(%zu) returned NULL", sizeof(PARCHashCodeTable));
+
+ assertNotNull(keyEqualsFunc, "keyEqualsFunc must be non-null");
+ assertNotNull(keyHashCodeFunc, "keyHashCodeFunc must be non-null");
+ assertTrue(minimumSize > 0, "minimumSize must be greater than zero");
+
+ table->keyEqualsFunc = keyEqualsFunc;
+ table->keyHashCodeFunc = keyHashCodeFunc;
+ table->keyDestroyer = keyDestroyer;
+ table->dataDestroyer = dataDestroyer;
+
+ table->hashtable.entries = parcMemory_AllocateAndClear(minimumSize * sizeof(HashTableEntry));
+ assertNotNull(table->hashtable.entries, "parcMemory_AllocateAndClear(%zu) returned NULL", minimumSize * sizeof(HashTableEntry));
+ table->hashtable.tableLimit = minimumSize;
+ table->hashtable.tableSize = 0;
+
+ memset(table->hashtable.entries, 0, minimumSize * sizeof(HashTableEntry));
+
+ // expand at 75% utilization
+ table->hashtable.expandThreshold = minimumSize - minimumSize / 4;
+
+ return table;
+}
+
+PARCHashCodeTable *
+parcHashCodeTable_Create(PARCHashCodeTable_KeyEqualsFunc keyEqualsFunc,
+ PARCHashCodeTable_HashCodeFunc keyHashCodeFunc,
+ PARCHashCodeTable_Destroyer keyDestroyer,
+ PARCHashCodeTable_Destroyer dataDestroyer)
+{
+ return parcHashCodeTable_Create_Size(keyEqualsFunc, keyHashCodeFunc, keyDestroyer, dataDestroyer, MIN_SIZE);
+}
+
+void
+parcHashCodeTable_Destroy(PARCHashCodeTable **tablePtr)
+{
+ assertNotNull(tablePtr, "Parameter must be non-null double pointer");
+ assertNotNull(*tablePtr, "Parameter must dereference to non-null pointer");
+ PARCHashCodeTable *table = *tablePtr;
+ size_t i;
+
+ for (i = 0; i < table->hashtable.tableLimit; i++) {
+ if (table->hashtable.entries[i].key != NULL) {
+ if (table->keyDestroyer) {
+ table->keyDestroyer(&table->hashtable.entries[i].key);
+ }
+
+ if (table->dataDestroyer) {
+ table->dataDestroyer(&table->hashtable.entries[i].data);
+ }
+ }
+ }
+
+ parcMemory_Deallocate((void **) &(table->hashtable.entries));
+ parcMemory_Deallocate((void **) &table);
+ *tablePtr = NULL;
+}
+
+bool
+parcHashCodeTable_Add(PARCHashCodeTable *table, void *key, void *data)
+{
+ assertNotNull(table, "Parameter table must be non-null");
+ assertNotNull(key, "Parameter key must be non-null");
+ assertNotNull(data, "Parameter data must be non-null");
+
+ if (table->hashtable.tableSize >= table->hashtable.expandThreshold) {
+ _expand(table);
+ }
+
+ HashCodeType hashcode = table->keyHashCodeFunc(key);
+
+ PARCHashCodeTable_AddResult result = ADD_OK;
+ do {
+ result = _innerTableAdd(&table->hashtable, table->keyEqualsFunc, hashcode, key, data);
+ if (result == ADD_NOSPACE) {
+ _expand(table);
+ }
+ } while (result == ADD_NOSPACE);
+
+ return (result == ADD_OK);
+}
+
+void
+parcHashCodeTable_Del(PARCHashCodeTable *table, const void *key)
+{
+ size_t index;
+ bool found;
+
+ assertNotNull(table, "Parameter table must be non-null");
+ assertNotNull(key, "parameter key must be non-null");
+
+ found = _findIndex(table, key, &index);
+
+ if (found) {
+ assertTrue(table->hashtable.tableSize > 0, "Illegal state: found entry in a hash table with 0 size");
+
+ if (table->keyDestroyer) {
+ table->keyDestroyer(&table->hashtable.entries[index].key);
+ }
+
+ if (table->dataDestroyer) {
+ table->dataDestroyer(&table->hashtable.entries[index].data);
+ }
+
+ memset(&table->hashtable.entries[index], 0, sizeof(HashTableEntry));
+
+ table->hashtable.tableSize--;
+ }
+}
+
+void *
+parcHashCodeTable_Get(PARCHashCodeTable *table, const void *key)
+{
+ size_t index;
+
+ assertNotNull(table, "Parameter table must be non-null");
+ assertNotNull(key, "parameter key must be non-null");
+
+ bool found = _findIndex(table, key, &index);
+
+ if (found) {
+ return table->hashtable.entries[index].data;
+ }
+
+ return NULL;
+}
+
+size_t
+parcHashCodeTable_Length(const PARCHashCodeTable *table)
+{
+ assertNotNull(table, "Parameter table must be non-null");
+ return table->hashtable.tableSize;
+}