From 09138d762c523dfa4c532eb82ac3bbd18b0e32eb Mon Sep 17 00:00:00 2001 From: Relintai Date: Sat, 18 Mar 2023 09:44:01 +0100 Subject: [PATCH] Ported: Use hash table for GDScript parsing GDScript now uses hash table for lookup of type lists / functions / keywords, instead of linear String comparisons. - lawnjelly https://github.com/godotengine/godot/commit/19f2006ec073f42d1b2dc480ac4103ac24b0b59a --- modules/cscript/cscript.cpp | 8 +- modules/cscript/cscript_tokenizer.cpp | 158 +++++++++++++++--------- modules/cscript/cscript_tokenizer.h | 17 ++- modules/gdscript/gdscript.cpp | 4 + modules/gdscript/gdscript_tokenizer.cpp | 158 +++++++++++++++--------- modules/gdscript/gdscript_tokenizer.h | 17 ++- 6 files changed, 236 insertions(+), 126 deletions(-) diff --git a/modules/cscript/cscript.cpp b/modules/cscript/cscript.cpp index bcd05f0db..ce31bc95a 100644 --- a/modules/cscript/cscript.cpp +++ b/modules/cscript/cscript.cpp @@ -30,13 +30,13 @@ #include "cscript.h" -#include "core/core_string_names.h" #include "core/config/engine.h" +#include "core/config/project_settings.h" +#include "core/core_string_names.h" #include "core/global_constants.h" #include "core/io/file_access_encrypted.h" #include "core/os/file_access.h" #include "core/os/os.h" -#include "core/config/project_settings.h" #include "cscript_compiler.h" /////////////////////////// @@ -2026,6 +2026,8 @@ CScriptWarning::Code CScriptWarning::get_code_from_name(const String &p_name) { #endif // DEBUG_ENABLED CScriptLanguage::CScriptLanguage() { + CScriptTokenizer::initialize(); + calls = 0; ERR_FAIL_COND(singleton); singleton = this; @@ -2070,6 +2072,8 @@ CScriptLanguage::CScriptLanguage() { } CScriptLanguage::~CScriptLanguage() { + CScriptTokenizer::terminate(); + if (_call_stack) { memdelete_arr(_call_stack); } diff --git a/modules/cscript/cscript_tokenizer.cpp b/modules/cscript/cscript_tokenizer.cpp index 4647e5128..cfcbd6c99 100644 --- a/modules/cscript/cscript_tokenizer.cpp +++ b/modules/cscript/cscript_tokenizer.cpp @@ -30,11 +30,13 @@ #include "cscript_tokenizer.h" -#include "core/io/marshalls.h" #include "core/containers/rb_map.h" +#include "core/io/marshalls.h" #include "core/string/print_string.h" #include "cscript_functions.h" +OAHashMap *CScriptTokenizer::token_hashtable = NULL; + const char *CScriptTokenizer::token_names[TK_MAX] = { "Empty", "Identifier", @@ -228,6 +230,96 @@ static const _kws _keyword_list[] = { { CScriptTokenizer::TK_ERROR, nullptr } }; +// Prepare the hash table for parsing as a one off at startup. +void CScriptTokenizer::initialize() { + token_hashtable = memnew((OAHashMap)); + + token_hashtable->insert("null", 0); + token_hashtable->insert("true", 1); + token_hashtable->insert("false", 2); + + // _type_list + int id = TOKEN_HASH_TABLE_TYPE_START; + int idx = 0; + while (_type_list[idx].text) { + token_hashtable->insert(_type_list[idx].text, id++); + idx++; + } + + // built in funcs + id = TOKEN_HASH_TABLE_BUILTIN_START; + for (int j = 0; j < CScriptFunctions::FUNC_MAX; j++) { + token_hashtable->insert(CScriptFunctions::get_func_name(CScriptFunctions::Function(j)), id++); + } + + // keywords + id = TOKEN_HASH_TABLE_KEYWORD_START; + idx = 0; + while (_keyword_list[idx].text) { + token_hashtable->insert(_keyword_list[idx].text, id++); + idx++; + } +} + +void CScriptTokenizer::terminate() { + if (token_hashtable) { + memdelete(token_hashtable); + token_hashtable = nullptr; + } +} + +// return whether found +bool CScriptTokenizerText::_parse_identifier(const String &p_str) { + // N.B. CScriptTokenizer::initialize() must have been called before using this function, + // else token_hashtable will be NULL. + const int *found = token_hashtable->lookup_ptr(p_str); + + if (found) { + int id = *found; + if (id < TOKEN_HASH_TABLE_TYPE_START) { + switch (id) { + case 0: { + _make_constant(Variant()); + } break; + case 1: { + _make_constant(true); + } break; + case 2: { + _make_constant(false); + } break; + default: { + DEV_ASSERT(0); + } break; + } + return true; + } else { + // type list + if (id < TOKEN_HASH_TABLE_BUILTIN_START) { + int idx = id - TOKEN_HASH_TABLE_TYPE_START; + _make_type(_type_list[idx].type); + return true; + } + + // built in func + if (id < TOKEN_HASH_TABLE_KEYWORD_START) { + int idx = id - TOKEN_HASH_TABLE_BUILTIN_START; + _make_built_in_func(CScriptFunctions::Function(idx)); + return true; + } + + // keyword + int idx = id - TOKEN_HASH_TABLE_KEYWORD_START; + _make_token(_keyword_list[idx].token); + return true; + } + + return true; + } + + // not found + return false; +} + const char *CScriptTokenizer::get_token_name(Token p_token) { ERR_FAIL_INDEX_V(p_token, TK_MAX, ""); return token_names[p_token]; @@ -962,69 +1054,15 @@ void CScriptTokenizerText::_advance() { i++; } - bool identifier = false; + // Detect preset keywords / functions using hashtable. + bool found = _parse_identifier(str); - if (str == "null") { - _make_constant(Variant()); - - } else if (str == "true") { - _make_constant(true); - - } else if (str == "false") { - _make_constant(false); - } else { - bool found = false; - - { - int idx = 0; - - while (_type_list[idx].text) { - if (str == _type_list[idx].text) { - _make_type(_type_list[idx].type); - found = true; - break; - } - idx++; - } - } - - if (!found) { - //built in func? - - for (int j = 0; j < CScriptFunctions::FUNC_MAX; j++) { - if (str == CScriptFunctions::get_func_name(CScriptFunctions::Function(j))) { - _make_built_in_func(CScriptFunctions::Function(j)); - found = true; - break; - } - } - } - - if (!found) { - //keyword - - int idx = 0; - found = false; - - while (_keyword_list[idx].text) { - if (str == _keyword_list[idx].text) { - _make_token(_keyword_list[idx].token); - found = true; - break; - } - idx++; - } - } - - if (!found) { - identifier = true; - } - } - - if (identifier) { + if (!found) { _make_identifier(str); } + INCPOS(str.length()); + return; } diff --git a/modules/cscript/cscript_tokenizer.h b/modules/cscript/cscript_tokenizer.h index 455295bef..603c707bd 100644 --- a/modules/cscript/cscript_tokenizer.h +++ b/modules/cscript/cscript_tokenizer.h @@ -30,11 +30,12 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ +#include "core/containers/oa_hash_map.h" #include "core/containers/pair.h" +#include "core/containers/vmap.h" #include "core/string/string_name.h" #include "core/string/ustring.h" #include "core/variant/variant.h" -#include "core/containers/vmap.h" #include "cscript_functions.h" class CScriptTokenizer { @@ -144,9 +145,20 @@ protected: static const char *token_names[TK_MAX]; + enum { + TOKEN_HASH_TABLE_TYPE_START = 3, + TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX, + TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + CScriptFunctions::FUNC_MAX, + }; + + static OAHashMap *token_hashtable; + public: static const char *get_token_name(Token p_token); + static void initialize(); + static void terminate(); + bool is_token_literal(int p_offset = 0, bool variable_safe = false) const; StringName get_token_literal(int p_offset = 0) const; @@ -167,7 +179,7 @@ public: virtual bool is_ignoring_warnings() const = 0; #endif // DEBUG_ENABLED - virtual ~CScriptTokenizer(){}; + virtual ~CScriptTokenizer() {} }; class CScriptTokenizerText : public CScriptTokenizer { @@ -220,6 +232,7 @@ class CScriptTokenizerText : public CScriptTokenizer { #endif // DEBUG_ENABLED void _advance(); + bool _parse_identifier(const String &p_str); public: void set_code(const String &p_code); diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index b5dc1a643..d9bc12132 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -2053,6 +2053,8 @@ GDScriptWarning::Code GDScriptWarning::get_code_from_name(const String &p_name) #endif // DEBUG_ENABLED GDScriptLanguage::GDScriptLanguage() { + GDScriptTokenizer::initialize(); + calls = 0; ERR_FAIL_COND(singleton); singleton = this; @@ -2097,6 +2099,8 @@ GDScriptLanguage::GDScriptLanguage() { } GDScriptLanguage::~GDScriptLanguage() { + GDScriptTokenizer::terminate(); + if (_call_stack) { memdelete_arr(_call_stack); } diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index cc6fe4639..82c18fade 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -30,11 +30,13 @@ #include "gdscript_tokenizer.h" -#include "core/io/marshalls.h" #include "core/containers/rb_map.h" +#include "core/io/marshalls.h" #include "core/string/print_string.h" #include "gdscript_functions.h" +OAHashMap *GDScriptTokenizer::token_hashtable = NULL; + const char *GDScriptTokenizer::token_names[TK_MAX] = { "Empty", "Identifier", @@ -230,6 +232,96 @@ static const _kws _keyword_list[] = { { GDScriptTokenizer::TK_ERROR, nullptr } }; +// Prepare the hash table for parsing as a one off at startup. +void GDScriptTokenizer::initialize() { + token_hashtable = memnew((OAHashMap)); + + token_hashtable->insert("null", 0); + token_hashtable->insert("true", 1); + token_hashtable->insert("false", 2); + + // _type_list + int id = TOKEN_HASH_TABLE_TYPE_START; + int idx = 0; + while (_type_list[idx].text) { + token_hashtable->insert(_type_list[idx].text, id++); + idx++; + } + + // built in funcs + id = TOKEN_HASH_TABLE_BUILTIN_START; + for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) { + token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++); + } + + // keywords + id = TOKEN_HASH_TABLE_KEYWORD_START; + idx = 0; + while (_keyword_list[idx].text) { + token_hashtable->insert(_keyword_list[idx].text, id++); + idx++; + } +} + +void GDScriptTokenizer::terminate() { + if (token_hashtable) { + memdelete(token_hashtable); + token_hashtable = nullptr; + } +} + +// return whether found +bool GDScriptTokenizerText::_parse_identifier(const String &p_str) { + // N.B. GDScriptTokenizer::initialize() must have been called before using this function, + // else token_hashtable will be NULL. + const int *found = token_hashtable->lookup_ptr(p_str); + + if (found) { + int id = *found; + if (id < TOKEN_HASH_TABLE_TYPE_START) { + switch (id) { + case 0: { + _make_constant(Variant()); + } break; + case 1: { + _make_constant(true); + } break; + case 2: { + _make_constant(false); + } break; + default: { + DEV_ASSERT(0); + } break; + } + return true; + } else { + // type list + if (id < TOKEN_HASH_TABLE_BUILTIN_START) { + int idx = id - TOKEN_HASH_TABLE_TYPE_START; + _make_type(_type_list[idx].type); + return true; + } + + // built in func + if (id < TOKEN_HASH_TABLE_KEYWORD_START) { + int idx = id - TOKEN_HASH_TABLE_BUILTIN_START; + _make_built_in_func(GDScriptFunctions::Function(idx)); + return true; + } + + // keyword + int idx = id - TOKEN_HASH_TABLE_KEYWORD_START; + _make_token(_keyword_list[idx].token); + return true; + } + + return true; + } + + // not found + return false; +} + const char *GDScriptTokenizer::get_token_name(Token p_token) { ERR_FAIL_INDEX_V(p_token, TK_MAX, ""); return token_names[p_token]; @@ -965,69 +1057,15 @@ void GDScriptTokenizerText::_advance() { i++; } - bool identifier = false; + // Detect preset keywords / functions using hashtable. + bool found = _parse_identifier(str); - if (str == "null") { - _make_constant(Variant()); - - } else if (str == "true") { - _make_constant(true); - - } else if (str == "false") { - _make_constant(false); - } else { - bool found = false; - - { - int idx = 0; - - while (_type_list[idx].text) { - if (str == _type_list[idx].text) { - _make_type(_type_list[idx].type); - found = true; - break; - } - idx++; - } - } - - if (!found) { - //built in func? - - for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) { - if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) { - _make_built_in_func(GDScriptFunctions::Function(j)); - found = true; - break; - } - } - } - - if (!found) { - //keyword - - int idx = 0; - found = false; - - while (_keyword_list[idx].text) { - if (str == _keyword_list[idx].text) { - _make_token(_keyword_list[idx].token); - found = true; - break; - } - idx++; - } - } - - if (!found) { - identifier = true; - } - } - - if (identifier) { + if (!found) { _make_identifier(str); } + INCPOS(str.length()); + return; } diff --git a/modules/gdscript/gdscript_tokenizer.h b/modules/gdscript/gdscript_tokenizer.h index 8b2a60410..a596281f1 100644 --- a/modules/gdscript/gdscript_tokenizer.h +++ b/modules/gdscript/gdscript_tokenizer.h @@ -30,11 +30,12 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ +#include "core/containers/oa_hash_map.h" #include "core/containers/pair.h" +#include "core/containers/vmap.h" #include "core/string/string_name.h" #include "core/string/ustring.h" #include "core/variant/variant.h" -#include "core/containers/vmap.h" #include "gdscript_functions.h" class GDScriptTokenizer { @@ -145,9 +146,20 @@ protected: static const char *token_names[TK_MAX]; + enum { + TOKEN_HASH_TABLE_TYPE_START = 3, + TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX, + TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + GDScriptFunctions::FUNC_MAX, + }; + + static OAHashMap *token_hashtable; + public: static const char *get_token_name(Token p_token); + static void initialize(); + static void terminate(); + bool is_token_literal(int p_offset = 0, bool variable_safe = false) const; StringName get_token_literal(int p_offset = 0) const; @@ -168,7 +180,7 @@ public: virtual bool is_ignoring_warnings() const = 0; #endif // DEBUG_ENABLED - virtual ~GDScriptTokenizer(){}; + virtual ~GDScriptTokenizer() {} }; class GDScriptTokenizerText : public GDScriptTokenizer { @@ -221,6 +233,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer { #endif // DEBUG_ENABLED void _advance(); + bool _parse_identifier(const String &p_str); public: void set_code(const String &p_code);