From fd7c2153a60caa53f970fc7e17cb015ebb73d1a3 Mon Sep 17 00:00:00 2001 From: Relintai Date: Sun, 15 Jan 2023 22:57:13 +0100 Subject: [PATCH] Replaced the HashMap's implementation with the one Godot4. Refactored it to work as a drop in replacement. Renamed the old one to OGHashMap. --- core/containers/hash_map.h | 961 +++++++++++++++-------------- core/containers/og_hash_map.h | 604 ++++++++++++++++++ core/containers/ordered_hash_map.h | 4 +- 3 files changed, 1117 insertions(+), 452 deletions(-) create mode 100644 core/containers/og_hash_map.h diff --git a/core/containers/hash_map.h b/core/containers/hash_map.h index 2e694c71d..98bcd2d1f 100644 --- a/core/containers/hash_map.h +++ b/core/containers/hash_map.h @@ -1,447 +1,309 @@ #ifndef HASH_MAP_H #define HASH_MAP_H -/*************************************************************************/ -/* hash_map.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "core/error/error_macros.h" +/**************************************************************************/ +/* hash_map.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + #include "core/containers/hashfuncs.h" -#include "core/containers/list.h" +#include "core/containers/paged_allocator.h" +#include "core/containers/pair.h" #include "core/math/math_funcs.h" #include "core/os/memory.h" -#include "core/string/ustring.h" /** - * @class HashMap - * @author Juan Linietsky + * A HashMap implementation that uses open addressing with Robin Hood hashing. + * Robin Hood hashing swaps out entries that have a smaller probing distance + * than the to-be-inserted entry, that evens out the average probing distance + * and enables faster lookups. Backward shift deletion is employed to further + * improve the performance and to avoid infinite loops in rare cases. * - * Implementation of a standard Hashing HashMap, for quick lookups of Data associated with a Key. - * The implementation provides hashers for the default types, if you need a special kind of hasher, provide - * your own. - * @param TKey Key, search is based on it, needs to be hasheable. It is unique in this container. - * @param TData Data, data associated with the key - * @param Hasher Hasher object, needs to provide a valid static hash function for TKey - * @param Comparator comparator object, needs to be able to safely compare two TKey values. It needs to ensure that x == x for any items inserted in the map. Bear in mind that nan != nan when implementing an equality check. - * @param MIN_HASH_TABLE_POWER Miminum size of the hash table, as a power of two. You rarely need to change this parameter. - * @param RELATIONSHIP Relationship at which the hash table is resized. if amount of elements is RELATIONSHIP - * times bigger than the hash table, table is resized to solve this condition. if RELATIONSHIP is zero, table is always MIN_HASH_TABLE_POWER. + * Keys and values are stored in a double linked list by insertion order. This + * has a slight performance overhead on lookup, which can be mostly compensated + * using a paged allocator if required. * + * The assignment operator copy the pairs from one map to the other. */ -template , uint8_t MIN_HASH_TABLE_POWER = 3, uint8_t RELATIONSHIP = 8> +template > class HashMap { public: - struct Pair { - TKey key; - TData data; - - Pair(const TKey &p_key) : - key(p_key), - data() {} - Pair(const TKey &p_key, const TData &p_data) : - key(p_key), - data(p_data) { - } - }; - - struct Element { - private: - friend class HashMap; - - uint32_t hash; - Element *next; - Element() { next = nullptr; } - Pair pair; - - public: - const TKey &key() const { - return pair.key; - } - - TData &value() { - return pair.data; - } - - const TData &value() const { - return pair.value(); - } - - Element(const TKey &p_key) : - pair(p_key) {} - Element(const Element &p_other) : - hash(p_other.hash), - pair(p_other.pair.key, p_other.pair.data) {} - }; - -private: - Element **hash_table; - uint8_t hash_table_power; - uint32_t elements; - - void make_hash_table() { - ERR_FAIL_COND(hash_table); - - hash_table = memnew_arr(Element *, (1 << MIN_HASH_TABLE_POWER)); - - hash_table_power = MIN_HASH_TABLE_POWER; - elements = 0; - for (int i = 0; i < (1 << MIN_HASH_TABLE_POWER); i++) { - hash_table[i] = nullptr; - } - } - - void erase_hash_table() { - ERR_FAIL_COND_MSG(elements, "Cannot erase hash table if there are still elements inside."); - - memdelete_arr(hash_table); - hash_table = nullptr; - hash_table_power = 0; - elements = 0; - } - - void check_hash_table() { - int new_hash_table_power = -1; - - if ((int)elements > ((1 << hash_table_power) * RELATIONSHIP)) { - /* rehash up */ - new_hash_table_power = hash_table_power + 1; - - while ((int)elements > ((1 << new_hash_table_power) * RELATIONSHIP)) { - new_hash_table_power++; - } - - } else if ((hash_table_power > (int)MIN_HASH_TABLE_POWER) && ((int)elements < ((1 << (hash_table_power - 1)) * RELATIONSHIP))) { - /* rehash down */ - new_hash_table_power = hash_table_power - 1; - - while ((int)elements < ((1 << (new_hash_table_power - 1)) * RELATIONSHIP)) { - new_hash_table_power--; - } - - if (new_hash_table_power < (int)MIN_HASH_TABLE_POWER) { - new_hash_table_power = MIN_HASH_TABLE_POWER; - } - } - - if (new_hash_table_power == -1) { - return; - } - - Element **new_hash_table = memnew_arr(Element *, ((uint64_t)1 << new_hash_table_power)); - ERR_FAIL_COND_MSG(!new_hash_table, "Out of memory."); - - for (int i = 0; i < (1 << new_hash_table_power); i++) { - new_hash_table[i] = nullptr; - } - - if (hash_table) { - for (int i = 0; i < (1 << hash_table_power); i++) { - while (hash_table[i]) { - Element *se = hash_table[i]; - hash_table[i] = se->next; - int new_pos = se->hash & ((1 << new_hash_table_power) - 1); - se->next = new_hash_table[new_pos]; - new_hash_table[new_pos] = se; - } - } - - memdelete_arr(hash_table); - } - hash_table = new_hash_table; - hash_table_power = new_hash_table_power; - } - - /* I want to have only one function.. */ - _FORCE_INLINE_ const Element *get_element(const TKey &p_key) const { - uint32_t hash = Hasher::hash(p_key); - uint32_t index = hash & ((1 << hash_table_power) - 1); - - Element *e = hash_table[index]; - - while (e) { - /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && Comparator::compare(e->pair.key, p_key)) { - /* the pair exists in this hashtable, so just update data */ - return e; - } - - e = e->next; - } - - return nullptr; - } - - Element *create_element(const TKey &p_key) { - /* if element doesn't exist, create it */ - Element *e = memnew(Element(p_key)); - ERR_FAIL_COND_V_MSG(!e, nullptr, "Out of memory."); - uint32_t hash = Hasher::hash(p_key); - uint32_t index = hash & ((1 << hash_table_power) - 1); - e->next = hash_table[index]; - e->hash = hash; - - hash_table[index] = e; - elements++; - - return e; - } - - void copy_from(const HashMap &p_t) { - if (&p_t == this) { - return; /* much less bother with that */ - } - - clear(); - - if (!p_t.hash_table || p_t.hash_table_power == 0) { - return; /* not copying from empty table */ - } - - hash_table = memnew_arr(Element *, (uint64_t)1 << p_t.hash_table_power); - hash_table_power = p_t.hash_table_power; - elements = p_t.elements; - - for (int i = 0; i < (1 << p_t.hash_table_power); i++) { - hash_table[i] = nullptr; - - const Element *e = p_t.hash_table[i]; - - while (e) { - Element *le = memnew(Element(*e)); /* local element */ - - /* add to list and reassign pointers */ - le->next = hash_table[i]; - hash_table[i] = le; - - e = e->next; - } - } - } + const uint32_t MIN_CAPACITY_INDEX = 2; // Use a prime. + const float MAX_OCCUPANCY = 0.75; + const uint32_t EMPTY_HASH = 0; public: - Element *set(const TKey &p_key, const TData &p_data) { - return set(Pair(p_key, p_data)); + struct Element { + Element *next = nullptr; + Element *prev = nullptr; + KeyValue data; + Element() {} + Element(const TKey &p_key, const TValue &p_value) : + data(p_key, p_value) {} + }; + +public: + _FORCE_INLINE_ uint32_t get_capacity() const { return hash_table_size_primes[capacity_index]; } + _FORCE_INLINE_ uint32_t size() const { return num_elements; } + + /* Standard Godot Container API */ + + bool empty() const { + return num_elements == 0; } - Element *set(const Pair &p_pair) { - Element *e = nullptr; - if (!hash_table) { - make_hash_table(); // if no table, make one - } else { - e = const_cast(get_element(p_pair.key)); + void clear() { + if (elements == nullptr) { + return; } - - /* if we made it up to here, the pair doesn't exist, create and assign */ - - if (!e) { - e = create_element(p_pair.key); - if (!e) { - return nullptr; + uint32_t capacity = hash_table_size_primes[capacity_index]; + for (uint32_t i = 0; i < capacity; i++) { + if (hashes[i] == EMPTY_HASH) { + continue; } - check_hash_table(); // perform mantenience routine + + hashes[i] = EMPTY_HASH; + memdelete(elements[i]); + elements[i] = nullptr; } - e->pair.data = p_pair.data; - return e; + tail_element = nullptr; + head_element = nullptr; + num_elements = 0; } - bool has(const TKey &p_key) const { - return getptr(p_key) != nullptr; + TValue &get(const TKey &p_key) { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + CRASH_COND_MSG(!exists, "HashMap key not found."); + return elements[pos]->data.value; } - /** - * Get a key from data, return a const reference. - * WARNING: this doesn't check errors, use either getptr and check NULL, or check - * first with has(key) - */ - - const TData &get(const TKey &p_key) const { - const TData *res = getptr(p_key); - CRASH_COND_MSG(!res, "Map key not found."); - return *res; + const TValue &get(const TKey &p_key) const { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + CRASH_COND_MSG(!exists, "HashMap key not found."); + return elements[pos]->data.value; } - TData &get(const TKey &p_key) { - TData *res = getptr(p_key); - CRASH_COND_MSG(!res, "Map key not found."); - return *res; - } + const TValue *getptr(const TKey &p_key) const { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); - /** - * Same as get, except it can return NULL when item was not found. - * This is mainly used for speed purposes. - */ - - _FORCE_INLINE_ TData *getptr(const TKey &p_key) { - if (unlikely(!hash_table)) { - return nullptr; + if (exists) { + return &elements[pos]->data.value; } - - Element *e = const_cast(get_element(p_key)); - - if (e) { - return &e->pair.data; - } - return nullptr; } - _FORCE_INLINE_ const TData *getptr(const TKey &p_key) const { - if (unlikely(!hash_table)) { - return nullptr; + TValue *getptr(const TKey &p_key) { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + + if (exists) { + return &elements[pos]->data.value; } - - const Element *e = const_cast(get_element(p_key)); - - if (e) { - return &e->pair.data; - } - return nullptr; } + const Element *get_element(const TKey &p_key) const { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + + if (exists) { + return elements[pos]; + } + + return NULL; + } + + Element *get_element(const TKey &p_key) { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + + if (exists) { + return elements[pos]; + } + + return NULL; + } + /** * Same as get, except it can return NULL when item was not found. * This version is custom, will take a hash and a custom key (that should support operator==() */ template - _FORCE_INLINE_ TData *custom_getptr(C p_custom_key, uint32_t p_custom_hash) { - if (unlikely(!hash_table)) { - return nullptr; - } - - uint32_t hash = p_custom_hash; - uint32_t index = hash & ((1 << hash_table_power) - 1); - - Element *e = hash_table[index]; - - while (e) { - /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && Comparator::compare(e->pair.key, p_custom_key)) { - /* the pair exists in this hashtable, so just update data */ - return &e->pair.data; - } - - e = e->next; - } - - return nullptr; - } - - template - _FORCE_INLINE_ const TData *custom_getptr(C p_custom_key, uint32_t p_custom_hash) const { - if (unlikely(!hash_table)) { + _FORCE_INLINE_ TValue *custom_getptr(C p_custom_key, uint32_t p_custom_hash) { + if (unlikely(!elements)) { return NULL; } + const uint32_t capacity = hash_table_size_primes[capacity_index]; + const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index]; uint32_t hash = p_custom_hash; - uint32_t index = hash & ((1 << hash_table_power) - 1); + uint32_t pos = fastmod(hash, capacity_inv, capacity); + uint32_t distance = 0; - const Element *e = hash_table[index]; - - while (e) { - /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && Comparator::compare(e->pair.key, p_custom_key)) { - /* the pair exists in this hashtable, so just update data */ - return &e->pair.data; + while (true) { + if (hashes[pos] == EMPTY_HASH) { + return NULL; } - e = e->next; + if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) { + return NULL; + } + + if (hashes[pos] == hash && Comparator::compare(elements[pos]->data.key, p_custom_key)) { + return &elements[pos]->data.value; + } + + pos = fastmod((pos + 1), capacity_inv, capacity); + distance++; } return NULL; } - /** - * Erase an item, return true if erasing was successful - */ + template + _FORCE_INLINE_ const TValue *custom_getptr(C p_custom_key, uint32_t p_custom_hash) const { + if (unlikely(!elements)) { + return NULL; + } + + const uint32_t capacity = hash_table_size_primes[capacity_index]; + const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index]; + uint32_t hash = p_custom_hash; + uint32_t pos = fastmod(hash, capacity_inv, capacity); + uint32_t distance = 0; + + while (true) { + if (hashes[pos] == EMPTY_HASH) { + return NULL; + } + + if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) { + return NULL; + } + + if (hashes[pos] == hash && Comparator::compare(elements[pos]->data.key, p_custom_key)) { + return &elements[pos]->data.value; + } + + pos = fastmod((pos + 1), capacity_inv, capacity); + distance++; + } + + return NULL; + } + + _FORCE_INLINE_ bool has(const TKey &p_key) const { + uint32_t _pos = 0; + return _lookup_pos(p_key, _pos); + } bool erase(const TKey &p_key) { - if (unlikely(!hash_table)) { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + + if (!exists) { return false; } - uint32_t hash = Hasher::hash(p_key); - uint32_t index = hash & ((1 << hash_table_power) - 1); - - Element *e = hash_table[index]; - Element *p = nullptr; - while (e) { - /* checking hash first avoids comparing key, which may take longer */ - if (e->hash == hash && Comparator::compare(e->pair.key, p_key)) { - if (p) { - p->next = e->next; - } else { - //begin of list - hash_table[index] = e->next; - } - - memdelete(e); - elements--; - - if (elements == 0) { - erase_hash_table(); - } else { - check_hash_table(); - } - return true; - } - - p = e; - e = e->next; + const uint32_t capacity = hash_table_size_primes[capacity_index]; + const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index]; + uint32_t next_pos = fastmod((pos + 1), capacity_inv, capacity); + while (hashes[next_pos] != EMPTY_HASH && _get_probe_length(next_pos, hashes[next_pos], capacity, capacity_inv) != 0) { + SWAP(hashes[next_pos], hashes[pos]); + SWAP(elements[next_pos], elements[pos]); + pos = next_pos; + next_pos = fastmod((pos + 1), capacity_inv, capacity); } - return false; + hashes[pos] = EMPTY_HASH; + + if (head_element == elements[pos]) { + head_element = elements[pos]->next; + } + + if (tail_element == elements[pos]) { + tail_element = elements[pos]->prev; + } + + if (elements[pos]->prev) { + elements[pos]->prev->next = elements[pos]->next; + } + + if (elements[pos]->next) { + elements[pos]->next->prev = elements[pos]->prev; + } + + memdelete(elements[pos]); + elements[pos] = nullptr; + + num_elements--; + return true; } - inline const TData &operator[](const TKey &p_key) const { //constref + // Reserves space for a number of elements, useful to avoid many resizes and rehashes. + // If adding a known (possibly large) number of elements at once, must be larger than old capacity. + void reserve(uint32_t p_new_capacity) { + uint32_t new_index = capacity_index; - return get(p_key); + while (hash_table_size_primes[new_index] < p_new_capacity) { + ERR_FAIL_COND_MSG(new_index + 1 == (uint32_t)HASH_TABLE_SIZE_MAX, nullptr); + new_index++; + } + + if (new_index == capacity_index) { + return; + } + + if (elements == nullptr) { + capacity_index = new_index; + return; // Unallocated yet. + } + _resize_and_rehash(new_index); } - inline TData &operator[](const TKey &p_key) { //assignment - Element *e = nullptr; - if (!hash_table) { - make_hash_table(); // if no table, make one - } else { - e = const_cast(get_element(p_key)); - } + _FORCE_INLINE_ Element *front() { + return head_element; + } + _FORCE_INLINE_ Element *back() { + return tail_element; + } - /* if we made it up to here, the pair doesn't exist, create */ - if (!e) { - e = create_element(p_key); - CRASH_COND(!e); - check_hash_table(); // perform mantenience routine - } - - return e->pair.data; + _FORCE_INLINE_ const Element *front() const { + return head_element; + } + _FORCE_INLINE_ const Element *back() const { + return tail_element; } /** @@ -458,36 +320,33 @@ public: * print( *k ); * } * + * This is for backwards compatibility. Use this syntax instead for new code: + * + * for (const HashMap::Element *E = map.front(); E; E = E->next) { + * ... + * } + * */ const TKey *next(const TKey *p_key) const { - if (unlikely(!hash_table)) { + if (unlikely(!elements)) { return nullptr; } if (!p_key) { /* get the first key */ - for (int i = 0; i < (1 << hash_table_power); i++) { - if (hash_table[i]) { - return &hash_table[i]->pair.key; - } + if (unlikely(!front())) { + return nullptr; } + return &front()->data.key; + } else { /* get the next key */ const Element *e = get_element(*p_key); ERR_FAIL_COND_V_MSG(!e, nullptr, "Invalid key supplied."); if (e->next) { /* if there is a "next" in the list, return that */ - return &e->next->pair.key; - } else { - /* go to next elements */ - uint32_t index = e->hash & ((1 << hash_table_power) - 1); - index++; - for (int i = index; i < (1 << hash_table_power); i++) { - if (hash_table[i]) { - return &hash_table[i]->pair.key; - } - } + return &e->next->data.key; } /* nothing found, was at end */ @@ -496,81 +355,283 @@ public: return nullptr; /* nothing found */ } - inline unsigned int size() const { - return elements; + /* Indexing */ + + const TValue &operator[](const TKey &p_key) const { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + CRASH_COND(!exists); + return elements[pos]->data.value; } - inline bool empty() const { - return elements == 0; - } - - void clear() { - /* clean up */ - if (hash_table) { - for (int i = 0; i < (1 << hash_table_power); i++) { - while (hash_table[i]) { - Element *e = hash_table[i]; - hash_table[i] = e->next; - memdelete(e); - } - } - - memdelete_arr(hash_table); - } - - hash_table = nullptr; - hash_table_power = 0; - elements = 0; - } - - void operator=(const HashMap &p_table) { - copy_from(p_table); - } - - HashMap() { - hash_table = nullptr; - elements = 0; - hash_table_power = 0; - } - - void get_key_value_ptr_array(const Pair **p_pairs) const { - if (unlikely(!hash_table)) { - return; - } - for (int i = 0; i < (1 << hash_table_power); i++) { - Element *e = hash_table[i]; - while (e) { - *p_pairs = &e->pair; - p_pairs++; - e = e->next; - } + TValue &operator[](const TKey &p_key) { + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + if (!exists) { + return _insert(p_key, TValue())->data.value; + } else { + return elements[pos]->data.value; } } + /* Insert */ + + Element *insert(const TKey &p_key, const TValue &p_value, bool p_front_insert = false) { + return _insert(p_key, p_value, p_front_insert); + } + + Element *set(const TKey &p_key, const TValue &p_value, bool p_front_insert = false) { + return _insert(p_key, p_value, p_front_insert); + } + + /* Helpers */ + void get_key_list(List *p_keys) const { - if (unlikely(!hash_table)) { + if (unlikely(!elements)) { return; } - for (int i = 0; i < (1 << hash_table_power); i++) { - Element *e = hash_table[i]; - while (e) { - p_keys->push_back(e->pair.key); - e = e->next; - } + + for (const Element *E = front(); E; E = E->next) { + p_keys->push_back(E->data.key); } } - HashMap(const HashMap &p_table) { - hash_table = nullptr; - elements = 0; - hash_table_power = 0; + /* Constructors */ - copy_from(p_table); + HashMap(const HashMap &p_other) { + reserve(hash_table_size_primes[p_other.capacity_index]); + + if (p_other.num_elements == 0) { + return; + } + + for (const Element *E = p_other.front(); E; E = E->next) { + insert(E->data.key, E->data.value); + } + } + + void operator=(const HashMap &p_other) { + if (this == &p_other) { + return; // Ignore self assignment. + } + if (num_elements != 0) { + clear(); + } + + reserve(hash_table_size_primes[p_other.capacity_index]); + + if (p_other.elements == nullptr) { + return; // Nothing to copy. + } + + for (const Element *E = p_other.front(); E; E = E->next) { + insert(E->data.key, E->data.value); + } + } + + HashMap(uint32_t p_initial_capacity) { + // Capacity can't be 0. + capacity_index = 0; + reserve(p_initial_capacity); + } + HashMap() { + capacity_index = MIN_CAPACITY_INDEX; + } + + uint32_t debug_get_hash(uint32_t p_index) { + if (num_elements == 0) { + return 0; + } + ERR_FAIL_INDEX_V(p_index, get_capacity(), 0); + return hashes[p_index]; + } + Element *debug_get_element(uint32_t p_index) { + if (num_elements == 0) { + return NULL; + } + + ERR_FAIL_INDEX_V(p_index, get_capacity(), NULL); + + return elements[p_index]; } ~HashMap() { clear(); + + if (elements != nullptr) { + Memory::free_static(elements); + Memory::free_static(hashes); + } + } + +private: + Element **elements = nullptr; + uint32_t *hashes = nullptr; + Element *head_element = nullptr; + Element *tail_element = nullptr; + + uint32_t capacity_index = 0; + uint32_t num_elements = 0; + + _FORCE_INLINE_ uint32_t _hash(const TKey &p_key) const { + uint32_t hash = Hasher::hash(p_key); + + if (unlikely(hash == EMPTY_HASH)) { + hash = EMPTY_HASH + 1; + } + + return hash; + } + + static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) { + const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity); + return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity); + } + + bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const { + if (elements == nullptr) { + return false; // Failed lookups, no elements + } + + const uint32_t capacity = hash_table_size_primes[capacity_index]; + const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index]; + uint32_t hash = _hash(p_key); + uint32_t pos = fastmod(hash, capacity_inv, capacity); + uint32_t distance = 0; + + while (true) { + if (hashes[pos] == EMPTY_HASH) { + return false; + } + + if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) { + return false; + } + + if (hashes[pos] == hash && Comparator::compare(elements[pos]->data.key, p_key)) { + r_pos = pos; + return true; + } + + pos = fastmod((pos + 1), capacity_inv, capacity); + distance++; + } + } + + void _insert_with_hash(uint32_t p_hash, Element *p_value) { + const uint32_t capacity = hash_table_size_primes[capacity_index]; + const uint64_t capacity_inv = hash_table_size_primes_inv[capacity_index]; + uint32_t hash = p_hash; + Element *value = p_value; + uint32_t distance = 0; + uint32_t pos = fastmod(hash, capacity_inv, capacity); + + while (true) { + if (hashes[pos] == EMPTY_HASH) { + elements[pos] = value; + hashes[pos] = hash; + + num_elements++; + + return; + } + + // Not an empty slot, let's check the probing length of the existing one. + uint32_t existing_probe_len = _get_probe_length(pos, hashes[pos], capacity, capacity_inv); + if (existing_probe_len < distance) { + SWAP(hash, hashes[pos]); + SWAP(value, elements[pos]); + distance = existing_probe_len; + } + + pos = fastmod((pos + 1), capacity_inv, capacity); + distance++; + } + } + + void _resize_and_rehash(uint32_t p_new_capacity_index) { + uint32_t old_capacity = hash_table_size_primes[capacity_index]; + + // Capacity can't be 0. + capacity_index = MAX((uint32_t)MIN_CAPACITY_INDEX, p_new_capacity_index); + + uint32_t capacity = hash_table_size_primes[capacity_index]; + + Element **old_elements = elements; + uint32_t *old_hashes = hashes; + + num_elements = 0; + hashes = reinterpret_cast(Memory::alloc_static(sizeof(uint32_t) * capacity)); + elements = reinterpret_cast(Memory::alloc_static(sizeof(Element *) * capacity)); + + for (uint32_t i = 0; i < capacity; i++) { + hashes[i] = 0; + elements[i] = nullptr; + } + + if (old_capacity == 0) { + // Nothing to do. + return; + } + + for (uint32_t i = 0; i < old_capacity; i++) { + if (old_hashes[i] == EMPTY_HASH) { + continue; + } + + _insert_with_hash(old_hashes[i], old_elements[i]); + } + + Memory::free_static(old_elements); + Memory::free_static(old_hashes); + } + + _FORCE_INLINE_ Element *_insert(const TKey &p_key, const TValue &p_value, bool p_front_insert = false) { + uint32_t capacity = hash_table_size_primes[capacity_index]; + if (unlikely(elements == nullptr)) { + // Allocate on demand to save memory. + + hashes = reinterpret_cast(Memory::alloc_static(sizeof(uint32_t) * capacity)); + elements = reinterpret_cast(Memory::alloc_static(sizeof(Element *) * capacity)); + + for (uint32_t i = 0; i < capacity; i++) { + hashes[i] = EMPTY_HASH; + elements[i] = nullptr; + } + } + + uint32_t pos = 0; + bool exists = _lookup_pos(p_key, pos); + + if (exists) { + elements[pos]->data.value = p_value; + return elements[pos]; + } else { + if (num_elements + 1 > MAX_OCCUPANCY * capacity) { + ERR_FAIL_COND_V_MSG(capacity_index + 1 == HASH_TABLE_SIZE_MAX, nullptr, "Hash table maximum capacity reached, aborting insertion."); + _resize_and_rehash(capacity_index + 1); + } + + Element *elem = memnew(Element(p_key, p_value)); + + if (tail_element == nullptr) { + head_element = elem; + tail_element = elem; + } else if (p_front_insert) { + head_element->prev = elem; + elem->next = head_element; + head_element = elem; + } else { + tail_element->next = elem; + elem->prev = tail_element; + tail_element = elem; + } + + uint32_t hash = _hash(p_key); + _insert_with_hash(hash, elem); + return elem; + } } }; -#endif +#endif // HASH_MAP_H diff --git a/core/containers/og_hash_map.h b/core/containers/og_hash_map.h new file mode 100644 index 000000000..c495ba774 --- /dev/null +++ b/core/containers/og_hash_map.h @@ -0,0 +1,604 @@ +#ifndef GHASH_MAP_H +#define GHASH_MAP_H + +/*************************************************************************/ +/* hash_map.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "core/containers/hashfuncs.h" +#include "core/containers/list.h" +#include "core/error/error_macros.h" +#include "core/math/math_funcs.h" +#include "core/os/memory.h" +#include "core/string/ustring.h" + +/** + * @class OGHashMap + * @author Juan Linietsky + * + * Implementation of a standard Hashing HashMap, for quick lookups of Data associated with a Key. + * The implementation provides hashers for the default types, if you need a special kind of hasher, provide + * your own. + * @param TKey Key, search is based on it, needs to be hasheable. It is unique in this container. + * @param TData Data, data associated with the key + * @param Hasher Hasher object, needs to provide a valid static hash function for TKey + * @param Comparator comparator object, needs to be able to safely compare two TKey values. It needs to ensure that x == x for any items inserted in the map. Bear in mind that nan != nan when implementing an equality check. + * @param MIN_HASH_TABLE_POWER Miminum size of the hash table, as a power of two. You rarely need to change this parameter. + * @param RELATIONSHIP Relationship at which the hash table is resized. if amount of elements is RELATIONSHIP + * times bigger than the hash table, table is resized to solve this condition. if RELATIONSHIP is zero, table is always MIN_HASH_TABLE_POWER. + * + */ + +template , uint8_t MIN_HASH_TABLE_POWER = 3, uint8_t RELATIONSHIP = 8> +class OGHashMap { +public: + struct Pair { + TKey key; + TData data; + + Pair(const TKey &p_key) : + key(p_key), + data() {} + Pair(const TKey &p_key, const TData &p_data) : + key(p_key), + data(p_data) { + } + }; + + struct Element { + private: + friend class OGHashMap; + + uint32_t hash; + Element *next; + Element() { next = nullptr; } + Pair pair; + + public: + const TKey &key() const { + return pair.key; + } + + TData &value() { + return pair.data; + } + + const TData &value() const { + return pair.data; + } + + TData &get() { + return pair.data; + }; + const TData &get() const { + return pair.data; + }; + + Element(const TKey &p_key) : + pair(p_key) {} + Element(const Element &p_other) : + hash(p_other.hash), + pair(p_other.pair.key, p_other.pair.data) {} + }; + +private: + Element **hash_table; + uint8_t hash_table_power; + uint32_t elements; + + void make_hash_table() { + ERR_FAIL_COND(hash_table); + + hash_table = memnew_arr(Element *, (1 << MIN_HASH_TABLE_POWER)); + + hash_table_power = MIN_HASH_TABLE_POWER; + elements = 0; + for (int i = 0; i < (1 << MIN_HASH_TABLE_POWER); i++) { + hash_table[i] = nullptr; + } + } + + void erase_hash_table() { + ERR_FAIL_COND_MSG(elements, "Cannot erase hash table if there are still elements inside."); + + memdelete_arr(hash_table); + hash_table = nullptr; + hash_table_power = 0; + elements = 0; + } + + void check_hash_table() { + int new_hash_table_power = -1; + + if ((int)elements > ((1 << hash_table_power) * RELATIONSHIP)) { + /* rehash up */ + new_hash_table_power = hash_table_power + 1; + + while ((int)elements > ((1 << new_hash_table_power) * RELATIONSHIP)) { + new_hash_table_power++; + } + + } else if ((hash_table_power > (int)MIN_HASH_TABLE_POWER) && ((int)elements < ((1 << (hash_table_power - 1)) * RELATIONSHIP))) { + /* rehash down */ + new_hash_table_power = hash_table_power - 1; + + while ((int)elements < ((1 << (new_hash_table_power - 1)) * RELATIONSHIP)) { + new_hash_table_power--; + } + + if (new_hash_table_power < (int)MIN_HASH_TABLE_POWER) { + new_hash_table_power = MIN_HASH_TABLE_POWER; + } + } + + if (new_hash_table_power == -1) { + return; + } + + Element **new_hash_table = memnew_arr(Element *, ((uint64_t)1 << new_hash_table_power)); + ERR_FAIL_COND_MSG(!new_hash_table, "Out of memory."); + + for (int i = 0; i < (1 << new_hash_table_power); i++) { + new_hash_table[i] = nullptr; + } + + if (hash_table) { + for (int i = 0; i < (1 << hash_table_power); i++) { + while (hash_table[i]) { + Element *se = hash_table[i]; + hash_table[i] = se->next; + int new_pos = se->hash & ((1 << new_hash_table_power) - 1); + se->next = new_hash_table[new_pos]; + new_hash_table[new_pos] = se; + } + } + + memdelete_arr(hash_table); + } + hash_table = new_hash_table; + hash_table_power = new_hash_table_power; + } + + /* I want to have only one function.. */ + _FORCE_INLINE_ const Element *get_element(const TKey &p_key) const { + uint32_t hash = Hasher::hash(p_key); + uint32_t index = hash & ((1 << hash_table_power) - 1); + + Element *e = hash_table[index]; + + while (e) { + /* checking hash first avoids comparing key, which may take longer */ + if (e->hash == hash && Comparator::compare(e->pair.key, p_key)) { + /* the pair exists in this hashtable, so just update data */ + return e; + } + + e = e->next; + } + + return nullptr; + } + + Element *create_element(const TKey &p_key) { + /* if element doesn't exist, create it */ + Element *e = memnew(Element(p_key)); + ERR_FAIL_COND_V_MSG(!e, nullptr, "Out of memory."); + uint32_t hash = Hasher::hash(p_key); + uint32_t index = hash & ((1 << hash_table_power) - 1); + e->next = hash_table[index]; + e->hash = hash; + + hash_table[index] = e; + elements++; + + return e; + } + + void copy_from(const OGHashMap &p_t) { + if (&p_t == this) { + return; /* much less bother with that */ + } + + clear(); + + if (!p_t.hash_table || p_t.hash_table_power == 0) { + return; /* not copying from empty table */ + } + + hash_table = memnew_arr(Element *, (uint64_t)1 << p_t.hash_table_power); + hash_table_power = p_t.hash_table_power; + elements = p_t.elements; + + for (int i = 0; i < (1 << p_t.hash_table_power); i++) { + hash_table[i] = nullptr; + + const Element *e = p_t.hash_table[i]; + + while (e) { + Element *le = memnew(Element(*e)); /* local element */ + + /* add to list and reassign pointers */ + le->next = hash_table[i]; + hash_table[i] = le; + + e = e->next; + } + } + } + +public: + Element *set(const TKey &p_key, const TData &p_data) { + return set(Pair(p_key, p_data)); + } + + Element *set(const Pair &p_pair) { + Element *e = nullptr; + if (!hash_table) { + make_hash_table(); // if no table, make one + } else { + e = const_cast(get_element(p_pair.key)); + } + + /* if we made it up to here, the pair doesn't exist, create and assign */ + + if (!e) { + e = create_element(p_pair.key); + if (!e) { + return nullptr; + } + check_hash_table(); // perform mantenience routine + } + + e->pair.data = p_pair.data; + return e; + } + + bool has(const TKey &p_key) const { + return getptr(p_key) != nullptr; + } + + /** + * Get a key from data, return a const reference. + * WARNING: this doesn't check errors, use either getptr and check NULL, or check + * first with has(key) + */ + + const TData &get(const TKey &p_key) const { + const TData *res = getptr(p_key); + CRASH_COND_MSG(!res, "Map key not found."); + return *res; + } + + TData &get(const TKey &p_key) { + TData *res = getptr(p_key); + CRASH_COND_MSG(!res, "Map key not found."); + return *res; + } + + /** + * Same as get, except it can return NULL when item was not found. + * This is mainly used for speed purposes. + */ + + _FORCE_INLINE_ TData *getptr(const TKey &p_key) { + if (unlikely(!hash_table)) { + return nullptr; + } + + Element *e = const_cast(get_element(p_key)); + + if (e) { + return &e->pair.data; + } + + return nullptr; + } + + _FORCE_INLINE_ const TData *getptr(const TKey &p_key) const { + if (unlikely(!hash_table)) { + return nullptr; + } + + const Element *e = const_cast(get_element(p_key)); + + if (e) { + return &e->pair.data; + } + + return nullptr; + } + + const Element *find(const TKey &p_key) const { + if (unlikely(!hash_table)) { + return nullptr; + } + + const Element *e = const_cast(get_element(p_key)); + + return e; + } + + Element *find(const TKey &p_key) { + if (unlikely(!hash_table)) { + return nullptr; + } + + Element *e = const_cast(get_element(p_key)); + + return e; + } + + /** + * Same as get, except it can return NULL when item was not found. + * This version is custom, will take a hash and a custom key (that should support operator==() + */ + + template + _FORCE_INLINE_ TData *custom_getptr(C p_custom_key, uint32_t p_custom_hash) { + if (unlikely(!hash_table)) { + return nullptr; + } + + uint32_t hash = p_custom_hash; + uint32_t index = hash & ((1 << hash_table_power) - 1); + + Element *e = hash_table[index]; + + while (e) { + /* checking hash first avoids comparing key, which may take longer */ + if (e->hash == hash && Comparator::compare(e->pair.key, p_custom_key)) { + /* the pair exists in this hashtable, so just update data */ + return &e->pair.data; + } + + e = e->next; + } + + return nullptr; + } + + template + _FORCE_INLINE_ const TData *custom_getptr(C p_custom_key, uint32_t p_custom_hash) const { + if (unlikely(!hash_table)) { + return NULL; + } + + uint32_t hash = p_custom_hash; + uint32_t index = hash & ((1 << hash_table_power) - 1); + + const Element *e = hash_table[index]; + + while (e) { + /* checking hash first avoids comparing key, which may take longer */ + if (e->hash == hash && Comparator::compare(e->pair.key, p_custom_key)) { + /* the pair exists in this hashtable, so just update data */ + return &e->pair.data; + } + + e = e->next; + } + + return NULL; + } + + /** + * Erase an item, return true if erasing was successful + */ + + bool erase(const TKey &p_key) { + if (unlikely(!hash_table)) { + return false; + } + + uint32_t hash = Hasher::hash(p_key); + uint32_t index = hash & ((1 << hash_table_power) - 1); + + Element *e = hash_table[index]; + Element *p = nullptr; + while (e) { + /* checking hash first avoids comparing key, which may take longer */ + if (e->hash == hash && Comparator::compare(e->pair.key, p_key)) { + if (p) { + p->next = e->next; + } else { + //begin of list + hash_table[index] = e->next; + } + + memdelete(e); + elements--; + + if (elements == 0) { + erase_hash_table(); + } else { + check_hash_table(); + } + return true; + } + + p = e; + e = e->next; + } + + return false; + } + + inline const TData &operator[](const TKey &p_key) const { //constref + + return get(p_key); + } + inline TData &operator[](const TKey &p_key) { //assignment + + Element *e = nullptr; + if (!hash_table) { + make_hash_table(); // if no table, make one + } else { + e = const_cast(get_element(p_key)); + } + + /* if we made it up to here, the pair doesn't exist, create */ + if (!e) { + e = create_element(p_key); + CRASH_COND(!e); + check_hash_table(); // perform mantenience routine + } + + return e->pair.data; + } + + /** + * Get the next key to p_key, and the first key if p_key is null. + * Returns a pointer to the next key if found, NULL otherwise. + * Adding/Removing elements while iterating will, of course, have unexpected results, don't do it. + * + * Example: + * + * const TKey *k=NULL; + * + * while( (k=table.next(k)) ) { + * + * print( *k ); + * } + * + */ + const TKey *next(const TKey *p_key) const { + if (unlikely(!hash_table)) { + return nullptr; + } + + if (!p_key) { /* get the first key */ + + for (int i = 0; i < (1 << hash_table_power); i++) { + if (hash_table[i]) { + return &hash_table[i]->pair.key; + } + } + + } else { /* get the next key */ + + const Element *e = get_element(*p_key); + ERR_FAIL_COND_V_MSG(!e, nullptr, "Invalid key supplied."); + if (e->next) { + /* if there is a "next" in the list, return that */ + return &e->next->pair.key; + } else { + /* go to next elements */ + uint32_t index = e->hash & ((1 << hash_table_power) - 1); + index++; + for (int i = index; i < (1 << hash_table_power); i++) { + if (hash_table[i]) { + return &hash_table[i]->pair.key; + } + } + } + + /* nothing found, was at end */ + } + + return nullptr; /* nothing found */ + } + + inline unsigned int size() const { + return elements; + } + + inline bool empty() const { + return elements == 0; + } + + void clear() { + /* clean up */ + if (hash_table) { + for (int i = 0; i < (1 << hash_table_power); i++) { + while (hash_table[i]) { + Element *e = hash_table[i]; + hash_table[i] = e->next; + memdelete(e); + } + } + + memdelete_arr(hash_table); + } + + hash_table = nullptr; + hash_table_power = 0; + elements = 0; + } + + void operator=(const OGHashMap &p_table) { + copy_from(p_table); + } + + OGHashMap() { + hash_table = nullptr; + elements = 0; + hash_table_power = 0; + } + + void get_key_value_ptr_array(const Pair **p_pairs) const { + if (unlikely(!hash_table)) { + return; + } + for (int i = 0; i < (1 << hash_table_power); i++) { + Element *e = hash_table[i]; + while (e) { + *p_pairs = &e->pair; + p_pairs++; + e = e->next; + } + } + } + + void get_key_list(List *p_keys) const { + if (unlikely(!hash_table)) { + return; + } + for (int i = 0; i < (1 << hash_table_power); i++) { + Element *e = hash_table[i]; + while (e) { + p_keys->push_back(e->pair.key); + e = e->next; + } + } + } + + OGHashMap(const OGHashMap &p_table) { + hash_table = nullptr; + elements = 0; + hash_table_power = 0; + + copy_from(p_table); + } + + ~OGHashMap() { + clear(); + } +}; + +#endif diff --git a/core/containers/ordered_hash_map.h b/core/containers/ordered_hash_map.h index 8bccfda9b..fb958b820 100644 --- a/core/containers/ordered_hash_map.h +++ b/core/containers/ordered_hash_map.h @@ -30,7 +30,7 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ -#include "core/containers/hash_map.h" +#include "core/containers/og_hash_map.h" #include "core/containers/list.h" #include "core/containers/pair.h" @@ -45,7 +45,7 @@ template , uint8_t MIN_HASH_TABLE_POWER = 3, uint8_t RELATIONSHIP = 8> class OrderedHashMap { typedef List> InternalList; - typedef HashMap InternalMap; + typedef OGHashMap InternalMap; InternalList list; InternalMap map;