mirror of
https://github.com/Relintai/pandemonium_engine.git
synced 2025-01-12 13:51:10 +01:00
1478 lines
37 KiB
C++
1478 lines
37 KiB
C++
/*************************************************************************/
|
|
/* gdscript_tokenizer.cpp */
|
|
/*************************************************************************/
|
|
/* This file is part of: */
|
|
/* GODOT ENGINE */
|
|
/* https://godotengine.org */
|
|
/*************************************************************************/
|
|
/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
|
|
/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
|
|
/* */
|
|
/* Permission is hereby granted, free of charge, to any person obtaining */
|
|
/* a copy of this software and associated documentation files (the */
|
|
/* "Software"), to deal in the Software without restriction, including */
|
|
/* without limitation the rights to use, copy, modify, merge, publish, */
|
|
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
|
/* permit persons to whom the Software is furnished to do so, subject to */
|
|
/* the following conditions: */
|
|
/* */
|
|
/* The above copyright notice and this permission notice shall be */
|
|
/* included in all copies or substantial portions of the Software. */
|
|
/* */
|
|
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
|
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
|
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
|
|
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
|
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
|
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
|
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
/*************************************************************************/
|
|
|
|
#include "cscript_tokenizer.h"
|
|
|
|
#include "core/io/marshalls.h"
|
|
#include "core/containers/map.h"
|
|
#include "core/string/print_string.h"
|
|
#include "cscript_functions.h"
|
|
|
|
const char *CScriptTokenizer::token_names[TK_MAX] = {
|
|
"Empty",
|
|
"Identifier",
|
|
"Constant",
|
|
"Self",
|
|
"Built-In Type",
|
|
"Built-In Func",
|
|
"In",
|
|
"'=='",
|
|
"'!='",
|
|
"'<'",
|
|
"'<='",
|
|
"'>'",
|
|
"'>='",
|
|
"'and'",
|
|
"'or'",
|
|
"'not'",
|
|
"'+'",
|
|
"'-'",
|
|
"'*'",
|
|
"'/'",
|
|
"'%'",
|
|
"'<<'",
|
|
"'>>'",
|
|
"'='",
|
|
"'+='",
|
|
"'-='",
|
|
"'*='",
|
|
"'/='",
|
|
"'%='",
|
|
"'<<='",
|
|
"'>>='",
|
|
"'&='",
|
|
"'|='",
|
|
"'^='",
|
|
"'&'",
|
|
"'|'",
|
|
"'^'",
|
|
"'~'",
|
|
//"Plus Plus",
|
|
//"Minus Minus",
|
|
"if",
|
|
"elif",
|
|
"else",
|
|
"for",
|
|
"while",
|
|
"break",
|
|
"continue",
|
|
"pass",
|
|
"return",
|
|
"match",
|
|
"func",
|
|
"class",
|
|
"class_name",
|
|
"extends",
|
|
"is",
|
|
"onready",
|
|
"tool",
|
|
"static",
|
|
"export",
|
|
"setget",
|
|
"const",
|
|
"var",
|
|
"as",
|
|
"void",
|
|
"enum",
|
|
"preload",
|
|
"assert",
|
|
"signal",
|
|
"breakpoint",
|
|
"'['",
|
|
"']'",
|
|
"'{'",
|
|
"'}'",
|
|
"'('",
|
|
"')'",
|
|
"','",
|
|
"';'",
|
|
"'.'",
|
|
"'?'",
|
|
"':'",
|
|
"'$'",
|
|
"'->'",
|
|
"'\\n'",
|
|
"PI",
|
|
"TAU",
|
|
"_",
|
|
"INF",
|
|
"NAN",
|
|
"Error",
|
|
"EOF",
|
|
"Cursor"
|
|
};
|
|
|
|
struct _bit {
|
|
Variant::Type type;
|
|
const char *text;
|
|
};
|
|
//built in types
|
|
|
|
static const _bit _type_list[] = {
|
|
//types
|
|
{ Variant::BOOL, "bool" },
|
|
{ Variant::INT, "int" },
|
|
{ Variant::REAL, "float" },
|
|
{ Variant::STRING, "String" },
|
|
{ Variant::RECT2, "Rect2" },
|
|
{ Variant::RECT2I, "Rect2i" },
|
|
{ Variant::VECTOR2, "Vector2" },
|
|
{ Variant::VECTOR2I, "Vector2i" },
|
|
{ Variant::VECTOR3, "Vector3" },
|
|
{ Variant::VECTOR3I, "Vector3i" },
|
|
{ Variant::VECTOR4, "Vector4" },
|
|
{ Variant::VECTOR4I, "Vector4i" },
|
|
{ Variant::PLANE, "Plane" },
|
|
{ Variant::QUATERNION, "Quaternion" },
|
|
{ Variant::AABB, "AABB" },
|
|
{ Variant::BASIS, "Basis" },
|
|
{ Variant::TRANSFORM, "Transform" },
|
|
{ Variant::TRANSFORM2D, "Transform2D" },
|
|
{ Variant::PROJECTION, "Projection" },
|
|
{ Variant::COLOR, "Color" },
|
|
{ Variant::NODE_PATH, "NodePath" },
|
|
{ Variant::RID, "RID" },
|
|
{ Variant::OBJECT, "Object" },
|
|
{ Variant::STRING_NAME, "StringName" },
|
|
{ Variant::DICTIONARY, "Dictionary" },
|
|
{ Variant::ARRAY, "Array" },
|
|
{ Variant::POOL_BYTE_ARRAY, "PoolByteArray" },
|
|
{ Variant::POOL_INT_ARRAY, "PoolIntArray" },
|
|
{ Variant::POOL_REAL_ARRAY, "PoolRealArray" },
|
|
{ Variant::POOL_STRING_ARRAY, "PoolStringArray" },
|
|
{ Variant::POOL_VECTOR2_ARRAY, "PoolVector2Array" },
|
|
{ Variant::POOL_VECTOR2I_ARRAY, "PoolVector2iArray" },
|
|
{ Variant::POOL_VECTOR3_ARRAY, "PoolVector3Array" },
|
|
{ Variant::POOL_VECTOR3I_ARRAY, "PoolVector3iArray" },
|
|
{ Variant::POOL_VECTOR4_ARRAY, "PoolVector4Array" },
|
|
{ Variant::POOL_VECTOR4I_ARRAY, "PoolVector4iArray" },
|
|
{ Variant::POOL_COLOR_ARRAY, "PoolColorArray" },
|
|
{ Variant::VARIANT_MAX, nullptr },
|
|
};
|
|
|
|
struct _kws {
|
|
CScriptTokenizer::Token token;
|
|
const char *text;
|
|
};
|
|
|
|
static const _kws _keyword_list[] = {
|
|
//ops
|
|
{ CScriptTokenizer::TK_OP_IN, "in" },
|
|
{ CScriptTokenizer::TK_OP_NOT, "not" },
|
|
{ CScriptTokenizer::TK_OP_OR, "or" },
|
|
{ CScriptTokenizer::TK_OP_AND, "and" },
|
|
//func
|
|
{ CScriptTokenizer::TK_PR_FUNCTION, "func" },
|
|
{ CScriptTokenizer::TK_PR_CLASS, "class" },
|
|
{ CScriptTokenizer::TK_PR_CLASS_NAME, "class_name" },
|
|
{ CScriptTokenizer::TK_PR_EXTENDS, "extends" },
|
|
{ CScriptTokenizer::TK_PR_IS, "is" },
|
|
{ CScriptTokenizer::TK_PR_ONREADY, "onready" },
|
|
{ CScriptTokenizer::TK_PR_TOOL, "tool" },
|
|
{ CScriptTokenizer::TK_PR_STATIC, "static" },
|
|
{ CScriptTokenizer::TK_PR_EXPORT, "export" },
|
|
{ CScriptTokenizer::TK_PR_SETGET, "setget" },
|
|
{ CScriptTokenizer::TK_PR_VAR, "var" },
|
|
{ CScriptTokenizer::TK_PR_AS, "as" },
|
|
{ CScriptTokenizer::TK_PR_VOID, "void" },
|
|
{ CScriptTokenizer::TK_PR_PRELOAD, "preload" },
|
|
{ CScriptTokenizer::TK_PR_ASSERT, "assert" },
|
|
{ CScriptTokenizer::TK_PR_SIGNAL, "signal" },
|
|
{ CScriptTokenizer::TK_PR_BREAKPOINT, "breakpoint" },
|
|
{ CScriptTokenizer::TK_PR_CONST, "const" },
|
|
{ CScriptTokenizer::TK_PR_ENUM, "enum" },
|
|
//controlflow
|
|
{ CScriptTokenizer::TK_CF_IF, "if" },
|
|
{ CScriptTokenizer::TK_CF_ELIF, "elif" },
|
|
{ CScriptTokenizer::TK_CF_ELSE, "else" },
|
|
{ CScriptTokenizer::TK_CF_FOR, "for" },
|
|
{ CScriptTokenizer::TK_CF_WHILE, "while" },
|
|
{ CScriptTokenizer::TK_CF_BREAK, "break" },
|
|
{ CScriptTokenizer::TK_CF_CONTINUE, "continue" },
|
|
{ CScriptTokenizer::TK_CF_RETURN, "return" },
|
|
{ CScriptTokenizer::TK_CF_MATCH, "match" },
|
|
{ CScriptTokenizer::TK_CF_PASS, "pass" },
|
|
{ CScriptTokenizer::TK_SELF, "self" },
|
|
{ CScriptTokenizer::TK_CONST_PI, "PI" },
|
|
{ CScriptTokenizer::TK_CONST_TAU, "TAU" },
|
|
{ CScriptTokenizer::TK_WILDCARD, "_" },
|
|
{ CScriptTokenizer::TK_CONST_INF, "INF" },
|
|
{ CScriptTokenizer::TK_CONST_NAN, "NAN" },
|
|
{ CScriptTokenizer::TK_ERROR, nullptr }
|
|
};
|
|
|
|
const char *CScriptTokenizer::get_token_name(Token p_token) {
|
|
ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
|
|
return token_names[p_token];
|
|
}
|
|
|
|
bool CScriptTokenizer::is_token_literal(int p_offset, bool variable_safe) const {
|
|
switch (get_token(p_offset)) {
|
|
// Can always be literal:
|
|
case TK_IDENTIFIER:
|
|
|
|
case TK_PR_ONREADY:
|
|
case TK_PR_TOOL:
|
|
case TK_PR_STATIC:
|
|
case TK_PR_EXPORT:
|
|
case TK_PR_SETGET:
|
|
case TK_PR_SIGNAL:
|
|
return true;
|
|
|
|
// Literal for non-variables only:
|
|
case TK_BUILT_IN_TYPE:
|
|
case TK_BUILT_IN_FUNC:
|
|
|
|
case TK_OP_IN:
|
|
//case TK_OP_NOT:
|
|
//case TK_OP_OR:
|
|
//case TK_OP_AND:
|
|
|
|
case TK_PR_CLASS:
|
|
case TK_PR_CONST:
|
|
case TK_PR_ENUM:
|
|
case TK_PR_PRELOAD:
|
|
case TK_PR_FUNCTION:
|
|
case TK_PR_EXTENDS:
|
|
case TK_PR_ASSERT:
|
|
case TK_PR_VAR:
|
|
|
|
case TK_CF_IF:
|
|
case TK_CF_ELIF:
|
|
case TK_CF_ELSE:
|
|
case TK_CF_FOR:
|
|
case TK_CF_WHILE:
|
|
case TK_CF_BREAK:
|
|
case TK_CF_CONTINUE:
|
|
case TK_CF_RETURN:
|
|
case TK_CF_MATCH:
|
|
case TK_CF_PASS:
|
|
case TK_SELF:
|
|
case TK_CONST_PI:
|
|
case TK_CONST_TAU:
|
|
case TK_WILDCARD:
|
|
case TK_CONST_INF:
|
|
case TK_CONST_NAN:
|
|
case TK_ERROR:
|
|
return !variable_safe;
|
|
|
|
case TK_CONSTANT: {
|
|
switch (get_token_constant(p_offset).get_type()) {
|
|
case Variant::NIL:
|
|
case Variant::BOOL:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
StringName CScriptTokenizer::get_token_literal(int p_offset) const {
|
|
Token token = get_token(p_offset);
|
|
switch (token) {
|
|
case TK_IDENTIFIER:
|
|
return get_token_identifier(p_offset);
|
|
case TK_BUILT_IN_TYPE: {
|
|
Variant::Type type = get_token_type(p_offset);
|
|
int idx = 0;
|
|
|
|
while (_type_list[idx].text) {
|
|
if (type == _type_list[idx].type) {
|
|
return _type_list[idx].text;
|
|
}
|
|
idx++;
|
|
}
|
|
} break; // Shouldn't get here, stuff happens
|
|
case TK_BUILT_IN_FUNC:
|
|
return CScriptFunctions::get_func_name(get_token_built_in_func(p_offset));
|
|
case TK_CONSTANT: {
|
|
const Variant value = get_token_constant(p_offset);
|
|
|
|
switch (value.get_type()) {
|
|
case Variant::NIL:
|
|
return "null";
|
|
case Variant::BOOL:
|
|
return value ? "true" : "false";
|
|
default: {
|
|
}
|
|
}
|
|
}
|
|
case TK_OP_AND:
|
|
case TK_OP_OR:
|
|
break; // Don't get into default, since they can be non-literal
|
|
default: {
|
|
int idx = 0;
|
|
|
|
while (_keyword_list[idx].text) {
|
|
if (token == _keyword_list[idx].token) {
|
|
return _keyword_list[idx].text;
|
|
}
|
|
idx++;
|
|
}
|
|
}
|
|
}
|
|
ERR_FAIL_V_MSG("", "Failed to get token literal.");
|
|
}
|
|
|
|
static bool _is_text_char(CharType c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
|
|
}
|
|
|
|
static bool _is_number(CharType c) {
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
static bool _is_hex(CharType c) {
|
|
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
|
}
|
|
|
|
static bool _is_bin(CharType c) {
|
|
return (c == '0' || c == '1');
|
|
}
|
|
|
|
void CScriptTokenizerText::_make_token(Token p_type) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
|
|
tk.type = p_type;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
void CScriptTokenizerText::_make_identifier(const StringName &p_identifier) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
|
|
tk.type = TK_IDENTIFIER;
|
|
tk.identifier = p_identifier;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
|
|
void CScriptTokenizerText::_make_built_in_func(CScriptFunctions::Function p_func) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
|
|
tk.type = TK_BUILT_IN_FUNC;
|
|
tk.func = p_func;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
void CScriptTokenizerText::_make_constant(const Variant &p_constant) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
|
|
tk.type = TK_CONSTANT;
|
|
tk.constant = p_constant;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
|
|
void CScriptTokenizerText::_make_type(const Variant::Type &p_type) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
|
|
tk.type = TK_BUILT_IN_TYPE;
|
|
tk.vtype = p_type;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
|
|
void CScriptTokenizerText::_make_error(const String &p_error) {
|
|
error_flag = true;
|
|
last_error = p_error;
|
|
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
tk.type = TK_ERROR;
|
|
tk.constant = p_error;
|
|
tk.line = line;
|
|
tk.col = column;
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
|
|
void CScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) {
|
|
TokenData &tk = tk_rb[tk_rb_pos];
|
|
tk.type = TK_NEWLINE;
|
|
tk.constant = Vector2(p_indentation, p_tabs);
|
|
tk.line = line;
|
|
tk.col = column;
|
|
tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
|
|
}
|
|
|
|
void CScriptTokenizerText::_advance() {
|
|
if (error_flag) {
|
|
//parser broke
|
|
_make_error(last_error);
|
|
return;
|
|
}
|
|
|
|
if (code_pos >= len) {
|
|
_make_token(TK_EOF);
|
|
return;
|
|
}
|
|
#define GETCHAR(m_ofs) ((m_ofs + code_pos) >= len ? 0 : _code[m_ofs + code_pos])
|
|
#define INCPOS(m_amount) \
|
|
{ \
|
|
code_pos += m_amount; \
|
|
column += m_amount; \
|
|
}
|
|
while (true) {
|
|
bool is_string_name = false;
|
|
StringMode string_mode = STRING_DOUBLE_QUOTE;
|
|
|
|
switch (GETCHAR(0)) {
|
|
case 0:
|
|
_make_token(TK_EOF);
|
|
break;
|
|
case '\\':
|
|
INCPOS(1);
|
|
if (GETCHAR(0) == '\r') {
|
|
INCPOS(1);
|
|
}
|
|
|
|
if (GETCHAR(0) != '\n') {
|
|
_make_error("Expected newline after '\\'.");
|
|
return;
|
|
}
|
|
|
|
INCPOS(1);
|
|
line++;
|
|
|
|
while (GETCHAR(0) == ' ' || GETCHAR(0) == '\t') {
|
|
INCPOS(1);
|
|
}
|
|
|
|
continue;
|
|
case '\t':
|
|
case '\r':
|
|
case ' ':
|
|
INCPOS(1);
|
|
continue;
|
|
case '#': { // line comment skip
|
|
#ifdef DEBUG_ENABLED
|
|
String comment;
|
|
#endif // DEBUG_ENABLED
|
|
while (GETCHAR(0) != '\n') {
|
|
#ifdef DEBUG_ENABLED
|
|
comment += GETCHAR(0);
|
|
#endif // DEBUG_ENABLED
|
|
code_pos++;
|
|
if (GETCHAR(0) == 0) { //end of file
|
|
//_make_error("Unterminated Comment");
|
|
_make_token(TK_EOF);
|
|
return;
|
|
}
|
|
}
|
|
#ifdef DEBUG_ENABLED
|
|
String comment_content = comment.trim_prefix("#").trim_prefix(" ");
|
|
if (comment_content.begins_with("warning-ignore:")) {
|
|
String code = comment_content.get_slice(":", 1);
|
|
warning_skips.push_back(Pair<int, String>(line, code.strip_edges().to_lower()));
|
|
} else if (comment_content.begins_with("warning-ignore-all:")) {
|
|
String code = comment_content.get_slice(":", 1);
|
|
warning_global_skips.insert(code.strip_edges().to_lower());
|
|
} else if (comment_content.strip_edges() == "warnings-disable") {
|
|
ignore_warnings = true;
|
|
}
|
|
#endif // DEBUG_ENABLED
|
|
FALLTHROUGH;
|
|
}
|
|
case '\n': {
|
|
line++;
|
|
INCPOS(1);
|
|
bool used_spaces = false;
|
|
int tabs = 0;
|
|
column = 1;
|
|
int i = 0;
|
|
while (true) {
|
|
if (GETCHAR(i) == ' ') {
|
|
i++;
|
|
used_spaces = true;
|
|
} else if (GETCHAR(i) == '\t') {
|
|
if (used_spaces) {
|
|
_make_error("Spaces used before tabs on a line");
|
|
return;
|
|
}
|
|
i++;
|
|
tabs++;
|
|
} else {
|
|
break; // not indentation anymore
|
|
}
|
|
}
|
|
|
|
_make_newline(i, tabs);
|
|
return;
|
|
}
|
|
case '/': {
|
|
switch (GETCHAR(1)) {
|
|
case '=': { // diveq
|
|
|
|
_make_token(TK_OP_ASSIGN_DIV);
|
|
INCPOS(1);
|
|
|
|
} break;
|
|
default:
|
|
_make_token(TK_OP_DIV);
|
|
}
|
|
} break;
|
|
case '=': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_EQUAL);
|
|
INCPOS(1);
|
|
|
|
} else {
|
|
_make_token(TK_OP_ASSIGN);
|
|
}
|
|
|
|
} break;
|
|
case '<': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_LESS_EQUAL);
|
|
INCPOS(1);
|
|
} else if (GETCHAR(1) == '<') {
|
|
if (GETCHAR(2) == '=') {
|
|
_make_token(TK_OP_ASSIGN_SHIFT_LEFT);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_SHIFT_LEFT);
|
|
}
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_LESS);
|
|
}
|
|
|
|
} break;
|
|
case '>': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_GREATER_EQUAL);
|
|
INCPOS(1);
|
|
} else if (GETCHAR(1) == '>') {
|
|
if (GETCHAR(2) == '=') {
|
|
_make_token(TK_OP_ASSIGN_SHIFT_RIGHT);
|
|
INCPOS(1);
|
|
|
|
} else {
|
|
_make_token(TK_OP_SHIFT_RIGHT);
|
|
}
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_GREATER);
|
|
}
|
|
|
|
} break;
|
|
case '!': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_NOT_EQUAL);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_NOT);
|
|
}
|
|
|
|
} break;
|
|
//case '"' //string - no strings in shader
|
|
//case '\'' //string - no strings in shader
|
|
case '{':
|
|
_make_token(TK_CURLY_BRACKET_OPEN);
|
|
break;
|
|
case '}':
|
|
_make_token(TK_CURLY_BRACKET_CLOSE);
|
|
break;
|
|
case '[':
|
|
_make_token(TK_BRACKET_OPEN);
|
|
break;
|
|
case ']':
|
|
_make_token(TK_BRACKET_CLOSE);
|
|
break;
|
|
case '(':
|
|
_make_token(TK_PARENTHESIS_OPEN);
|
|
break;
|
|
case ')':
|
|
_make_token(TK_PARENTHESIS_CLOSE);
|
|
break;
|
|
case ',':
|
|
_make_token(TK_COMMA);
|
|
break;
|
|
case ';':
|
|
_make_token(TK_SEMICOLON);
|
|
break;
|
|
case '?':
|
|
_make_token(TK_QUESTION_MARK);
|
|
break;
|
|
case ':':
|
|
_make_token(TK_COLON); //for methods maybe but now useless.
|
|
break;
|
|
case '$':
|
|
_make_token(TK_DOLLAR); //for the get_node() shortener
|
|
break;
|
|
case '^': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_BIT_XOR);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_BIT_XOR);
|
|
}
|
|
|
|
} break;
|
|
case '~':
|
|
_make_token(TK_OP_BIT_INVERT);
|
|
break;
|
|
case '&': {
|
|
if (GETCHAR(1) == '&') {
|
|
_make_token(TK_OP_AND);
|
|
INCPOS(1);
|
|
} else if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_BIT_AND);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_BIT_AND);
|
|
}
|
|
} break;
|
|
case '|': {
|
|
if (GETCHAR(1) == '|') {
|
|
_make_token(TK_OP_OR);
|
|
INCPOS(1);
|
|
} else if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_BIT_OR);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_BIT_OR);
|
|
}
|
|
} break;
|
|
case '*': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_MUL);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_MUL);
|
|
}
|
|
} break;
|
|
case '+': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_ADD);
|
|
INCPOS(1);
|
|
/*
|
|
} else if (GETCHAR(1)=='+') {
|
|
_make_token(TK_OP_PLUS_PLUS);
|
|
INCPOS(1);
|
|
*/
|
|
} else {
|
|
_make_token(TK_OP_ADD);
|
|
}
|
|
|
|
} break;
|
|
case '-': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_SUB);
|
|
INCPOS(1);
|
|
} else if (GETCHAR(1) == '>') {
|
|
_make_token(TK_FORWARD_ARROW);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_SUB);
|
|
}
|
|
} break;
|
|
case '%': {
|
|
if (GETCHAR(1) == '=') {
|
|
_make_token(TK_OP_ASSIGN_MOD);
|
|
INCPOS(1);
|
|
} else {
|
|
_make_token(TK_OP_MOD);
|
|
}
|
|
} break;
|
|
case '@':
|
|
if (CharType(GETCHAR(1)) != '"' && CharType(GETCHAR(1)) != '\'') {
|
|
_make_error("Unexpected '@'");
|
|
return;
|
|
}
|
|
INCPOS(1);
|
|
is_string_name = true;
|
|
FALLTHROUGH;
|
|
case '\'':
|
|
case '"': {
|
|
if (GETCHAR(0) == '\'') {
|
|
string_mode = STRING_SINGLE_QUOTE;
|
|
}
|
|
|
|
int i = 1;
|
|
if (string_mode == STRING_DOUBLE_QUOTE && GETCHAR(i) == '"' && GETCHAR(i + 1) == '"') {
|
|
i += 2;
|
|
string_mode = STRING_MULTILINE;
|
|
}
|
|
|
|
String str;
|
|
while (true) {
|
|
if (CharType(GETCHAR(i)) == 0) {
|
|
_make_error("Unterminated String");
|
|
return;
|
|
} else if (string_mode == STRING_DOUBLE_QUOTE && CharType(GETCHAR(i)) == '"') {
|
|
break;
|
|
} else if (string_mode == STRING_SINGLE_QUOTE && CharType(GETCHAR(i)) == '\'') {
|
|
break;
|
|
} else if (string_mode == STRING_MULTILINE && CharType(GETCHAR(i)) == '\"' && CharType(GETCHAR(i + 1)) == '\"' && CharType(GETCHAR(i + 2)) == '\"') {
|
|
i += 2;
|
|
break;
|
|
} else if (string_mode != STRING_MULTILINE && CharType(GETCHAR(i)) == '\n') {
|
|
_make_error("Unexpected EOL at String.");
|
|
return;
|
|
} else if (CharType(GETCHAR(i)) == 0xFFFF) {
|
|
//string ends here, next will be TK
|
|
i--;
|
|
break;
|
|
} else if (CharType(GETCHAR(i)) == '\\') {
|
|
//escaped characters...
|
|
i++;
|
|
CharType next = GETCHAR(i);
|
|
if (next == 0) {
|
|
_make_error("Unterminated String");
|
|
return;
|
|
}
|
|
CharType res = 0;
|
|
|
|
switch (next) {
|
|
case 'a':
|
|
res = 7;
|
|
break;
|
|
case 'b':
|
|
res = 8;
|
|
break;
|
|
case 't':
|
|
res = 9;
|
|
break;
|
|
case 'n':
|
|
res = 10;
|
|
break;
|
|
case 'v':
|
|
res = 11;
|
|
break;
|
|
case 'f':
|
|
res = 12;
|
|
break;
|
|
case 'r':
|
|
res = 13;
|
|
break;
|
|
case '\'':
|
|
res = '\'';
|
|
break;
|
|
case '\"':
|
|
res = '\"';
|
|
break;
|
|
case '\\':
|
|
res = '\\';
|
|
break;
|
|
case '/':
|
|
res = '/';
|
|
break; //wtf
|
|
|
|
case 'u': {
|
|
//hexnumbarh - oct is deprecated
|
|
i += 1;
|
|
for (int j = 0; j < 4; j++) {
|
|
CharType c = GETCHAR(i + j);
|
|
if (c == 0) {
|
|
_make_error("Unterminated String");
|
|
return;
|
|
}
|
|
|
|
CharType v = 0;
|
|
if (c >= '0' && c <= '9') {
|
|
v = c - '0';
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
v = c - 'a';
|
|
v += 10;
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
v = c - 'A';
|
|
v += 10;
|
|
} else {
|
|
_make_error("Malformed hex constant in string");
|
|
return;
|
|
}
|
|
|
|
res <<= 4;
|
|
res |= v;
|
|
}
|
|
i += 3;
|
|
|
|
} break;
|
|
default: {
|
|
_make_error("Invalid escape sequence");
|
|
return;
|
|
} break;
|
|
}
|
|
|
|
str += res;
|
|
|
|
} else {
|
|
if (CharType(GETCHAR(i)) == '\n') {
|
|
line++;
|
|
column = 1;
|
|
}
|
|
|
|
str += CharType(GETCHAR(i));
|
|
}
|
|
i++;
|
|
}
|
|
INCPOS(i);
|
|
|
|
if (is_string_name) {
|
|
_make_constant(StringName(str));
|
|
} else {
|
|
_make_constant(str);
|
|
}
|
|
|
|
} break;
|
|
case 0xFFFF: {
|
|
_make_token(TK_CURSOR);
|
|
} break;
|
|
default: {
|
|
if (_is_number(GETCHAR(0)) || (GETCHAR(0) == '.' && _is_number(GETCHAR(1)))) {
|
|
// parse number
|
|
bool period_found = false;
|
|
bool exponent_found = false;
|
|
bool hexa_found = false;
|
|
bool bin_found = false;
|
|
bool sign_found = false;
|
|
|
|
String str;
|
|
int i = 0;
|
|
|
|
while (true) {
|
|
if (GETCHAR(i) == '.') {
|
|
if (period_found || exponent_found) {
|
|
_make_error("Invalid numeric constant at '.'");
|
|
return;
|
|
} else if (bin_found) {
|
|
_make_error("Invalid binary constant at '.'");
|
|
return;
|
|
} else if (hexa_found) {
|
|
_make_error("Invalid hexadecimal constant at '.'");
|
|
return;
|
|
}
|
|
period_found = true;
|
|
} else if (GETCHAR(i) == 'x') {
|
|
if (hexa_found || bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
|
|
_make_error("Invalid numeric constant at 'x'");
|
|
return;
|
|
}
|
|
hexa_found = true;
|
|
} else if (hexa_found && _is_hex(GETCHAR(i))) {
|
|
} else if (!hexa_found && GETCHAR(i) == 'b') {
|
|
if (bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
|
|
_make_error("Invalid numeric constant at 'b'");
|
|
return;
|
|
}
|
|
bin_found = true;
|
|
} else if (!hexa_found && GETCHAR(i) == 'e') {
|
|
if (exponent_found || bin_found) {
|
|
_make_error("Invalid numeric constant at 'e'");
|
|
return;
|
|
}
|
|
exponent_found = true;
|
|
} else if (_is_number(GETCHAR(i))) {
|
|
//all ok
|
|
|
|
} else if (bin_found && _is_bin(GETCHAR(i))) {
|
|
} else if ((GETCHAR(i) == '-' || GETCHAR(i) == '+') && exponent_found) {
|
|
if (sign_found) {
|
|
_make_error("Invalid numeric constant at '-'");
|
|
return;
|
|
}
|
|
sign_found = true;
|
|
} else if (GETCHAR(i) == '_') {
|
|
i++;
|
|
continue; // Included for readability, shouldn't be a part of the string
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
str += CharType(GETCHAR(i));
|
|
i++;
|
|
}
|
|
|
|
if (!(_is_number(str[str.length() - 1]) || (hexa_found && _is_hex(str[str.length() - 1])))) {
|
|
_make_error("Invalid numeric constant: " + str);
|
|
return;
|
|
}
|
|
|
|
INCPOS(i);
|
|
if (hexa_found) {
|
|
int64_t val = str.hex_to_int64();
|
|
_make_constant(val);
|
|
} else if (bin_found) {
|
|
int64_t val = str.bin_to_int64();
|
|
_make_constant(val);
|
|
} else if (period_found || exponent_found) {
|
|
double val = str.to_double();
|
|
_make_constant(val);
|
|
} else {
|
|
int64_t val = str.to_int64();
|
|
_make_constant(val);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (GETCHAR(0) == '.') {
|
|
//parse period
|
|
_make_token(TK_PERIOD);
|
|
break;
|
|
}
|
|
|
|
if (_is_text_char(GETCHAR(0))) {
|
|
// parse identifier
|
|
String str;
|
|
str += CharType(GETCHAR(0));
|
|
|
|
int i = 1;
|
|
while (_is_text_char(GETCHAR(i))) {
|
|
str += CharType(GETCHAR(i));
|
|
i++;
|
|
}
|
|
|
|
bool identifier = false;
|
|
|
|
if (str == "null") {
|
|
_make_constant(Variant());
|
|
|
|
} else if (str == "true") {
|
|
_make_constant(true);
|
|
|
|
} else if (str == "false") {
|
|
_make_constant(false);
|
|
} else {
|
|
bool found = false;
|
|
|
|
{
|
|
int idx = 0;
|
|
|
|
while (_type_list[idx].text) {
|
|
if (str == _type_list[idx].text) {
|
|
_make_type(_type_list[idx].type);
|
|
found = true;
|
|
break;
|
|
}
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
//built in func?
|
|
|
|
for (int j = 0; j < CScriptFunctions::FUNC_MAX; j++) {
|
|
if (str == CScriptFunctions::get_func_name(CScriptFunctions::Function(j))) {
|
|
_make_built_in_func(CScriptFunctions::Function(j));
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
//keyword
|
|
|
|
int idx = 0;
|
|
found = false;
|
|
|
|
while (_keyword_list[idx].text) {
|
|
if (str == _keyword_list[idx].text) {
|
|
_make_token(_keyword_list[idx].token);
|
|
found = true;
|
|
break;
|
|
}
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
identifier = true;
|
|
}
|
|
}
|
|
|
|
if (identifier) {
|
|
_make_identifier(str);
|
|
}
|
|
INCPOS(str.length());
|
|
return;
|
|
}
|
|
|
|
_make_error("Unknown character");
|
|
return;
|
|
|
|
} break;
|
|
}
|
|
|
|
INCPOS(1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void CScriptTokenizerText::set_code(const String &p_code) {
|
|
code = p_code;
|
|
len = p_code.length();
|
|
if (len) {
|
|
_code = &code[0];
|
|
} else {
|
|
_code = nullptr;
|
|
}
|
|
code_pos = 0;
|
|
line = 1; //it is stand-ar-ized that lines begin in 1 in code..
|
|
column = 1; //the same holds for columns
|
|
tk_rb_pos = 0;
|
|
error_flag = false;
|
|
#ifdef DEBUG_ENABLED
|
|
ignore_warnings = false;
|
|
#endif // DEBUG_ENABLED
|
|
last_error = "";
|
|
for (int i = 0; i < MAX_LOOKAHEAD + 1; i++) {
|
|
_advance();
|
|
}
|
|
}
|
|
|
|
CScriptTokenizerText::Token CScriptTokenizerText::get_token(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, TK_ERROR);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, TK_ERROR);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
return tk_rb[ofs].type;
|
|
}
|
|
|
|
int CScriptTokenizerText::get_token_line(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
return tk_rb[ofs].line;
|
|
}
|
|
|
|
int CScriptTokenizerText::get_token_column(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
return tk_rb[ofs].col;
|
|
}
|
|
|
|
const Variant &CScriptTokenizerText::get_token_constant(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, tk_rb[0].constant);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, tk_rb[0].constant);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_CONSTANT, tk_rb[0].constant);
|
|
return tk_rb[ofs].constant;
|
|
}
|
|
|
|
StringName CScriptTokenizerText::get_token_identifier(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, StringName());
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, StringName());
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_IDENTIFIER, StringName());
|
|
return tk_rb[ofs].identifier;
|
|
}
|
|
|
|
CScriptFunctions::Function CScriptTokenizerText::get_token_built_in_func(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, CScriptFunctions::FUNC_MAX);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, CScriptFunctions::FUNC_MAX);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_FUNC, CScriptFunctions::FUNC_MAX);
|
|
return tk_rb[ofs].func;
|
|
}
|
|
|
|
Variant::Type CScriptTokenizerText::get_token_type(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, Variant::NIL);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, Variant::NIL);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_TYPE, Variant::NIL);
|
|
return tk_rb[ofs].vtype;
|
|
}
|
|
|
|
int CScriptTokenizerText::get_token_line_indent(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
|
|
return tk_rb[ofs].constant.operator Vector2().x;
|
|
}
|
|
|
|
int CScriptTokenizerText::get_token_line_tab_indent(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
|
|
return tk_rb[ofs].constant.operator Vector2().y;
|
|
}
|
|
|
|
String CScriptTokenizerText::get_token_error(int p_offset) const {
|
|
ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, String());
|
|
ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, String());
|
|
|
|
int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
|
|
ERR_FAIL_COND_V(tk_rb[ofs].type != TK_ERROR, String());
|
|
return tk_rb[ofs].constant;
|
|
}
|
|
|
|
void CScriptTokenizerText::advance(int p_amount) {
|
|
ERR_FAIL_COND(p_amount <= 0);
|
|
for (int i = 0; i < p_amount; i++) {
|
|
_advance();
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#define BYTECODE_VERSION 13
|
|
|
|
Error CScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
|
|
const uint8_t *buf = p_buffer.ptr();
|
|
int total_len = p_buffer.size();
|
|
ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
|
|
|
|
int version = decode_uint32(&buf[4]);
|
|
ERR_FAIL_COND_V_MSG(version > BYTECODE_VERSION, ERR_INVALID_DATA, "Bytecode is too recent! Please use a newer engine version.");
|
|
|
|
int identifier_count = decode_uint32(&buf[8]);
|
|
int constant_count = decode_uint32(&buf[12]);
|
|
int line_count = decode_uint32(&buf[16]);
|
|
int token_count = decode_uint32(&buf[20]);
|
|
|
|
const uint8_t *b = &buf[24];
|
|
total_len -= 24;
|
|
|
|
identifiers.resize(identifier_count);
|
|
for (int i = 0; i < identifier_count; i++) {
|
|
int len = decode_uint32(b);
|
|
ERR_FAIL_COND_V(len > total_len, ERR_INVALID_DATA);
|
|
b += 4;
|
|
Vector<uint8_t> cs;
|
|
cs.resize(len);
|
|
for (int j = 0; j < len; j++) {
|
|
cs.write[j] = b[j] ^ 0xb6;
|
|
}
|
|
|
|
cs.write[cs.size() - 1] = 0;
|
|
String s;
|
|
s.parse_utf8((const char *)cs.ptr());
|
|
b += len;
|
|
total_len -= len + 4;
|
|
identifiers.write[i] = s;
|
|
}
|
|
|
|
constants.resize(constant_count);
|
|
for (int i = 0; i < constant_count; i++) {
|
|
Variant v;
|
|
int len;
|
|
// An object cannot be constant, never decode objects
|
|
Error err = decode_variant(v, b, total_len, &len, false);
|
|
if (err) {
|
|
return err;
|
|
}
|
|
b += len;
|
|
total_len -= len;
|
|
constants.write[i] = v;
|
|
}
|
|
|
|
ERR_FAIL_COND_V(line_count * 8 > total_len, ERR_INVALID_DATA);
|
|
|
|
for (int i = 0; i < line_count; i++) {
|
|
uint32_t token = decode_uint32(b);
|
|
b += 4;
|
|
uint32_t linecol = decode_uint32(b);
|
|
b += 4;
|
|
|
|
lines.insert(token, linecol);
|
|
total_len -= 8;
|
|
}
|
|
|
|
tokens.resize(token_count);
|
|
|
|
for (int i = 0; i < token_count; i++) {
|
|
ERR_FAIL_COND_V(total_len < 1, ERR_INVALID_DATA);
|
|
|
|
if ((*b) & TOKEN_BYTE_MASK) { //little endian always
|
|
ERR_FAIL_COND_V(total_len < 4, ERR_INVALID_DATA);
|
|
|
|
tokens.write[i] = decode_uint32(b) & ~TOKEN_BYTE_MASK;
|
|
b += 4;
|
|
} else {
|
|
tokens.write[i] = *b;
|
|
b += 1;
|
|
total_len--;
|
|
}
|
|
}
|
|
|
|
token = 0;
|
|
|
|
return OK;
|
|
}
|
|
|
|
Vector<uint8_t> CScriptTokenizerBuffer::parse_code_string(const String &p_code) {
|
|
Vector<uint8_t> buf;
|
|
|
|
Map<StringName, int> identifier_map;
|
|
HashMap<Variant, int, VariantHasher, VariantComparator> constant_map;
|
|
Map<uint32_t, int> line_map;
|
|
Vector<uint32_t> token_array;
|
|
|
|
CScriptTokenizerText tt;
|
|
tt.set_code(p_code);
|
|
int line = -1;
|
|
|
|
while (true) {
|
|
if (tt.get_token_line() != line) {
|
|
line = tt.get_token_line();
|
|
line_map[line] = token_array.size();
|
|
}
|
|
|
|
uint32_t token = tt.get_token();
|
|
switch (tt.get_token()) {
|
|
case TK_IDENTIFIER: {
|
|
StringName id = tt.get_token_identifier();
|
|
if (!identifier_map.has(id)) {
|
|
int idx = identifier_map.size();
|
|
identifier_map[id] = idx;
|
|
}
|
|
token |= identifier_map[id] << TOKEN_BITS;
|
|
} break;
|
|
case TK_CONSTANT: {
|
|
const Variant &c = tt.get_token_constant();
|
|
if (!constant_map.has(c)) {
|
|
int idx = constant_map.size();
|
|
constant_map[c] = idx;
|
|
}
|
|
token |= constant_map[c] << TOKEN_BITS;
|
|
} break;
|
|
case TK_BUILT_IN_TYPE: {
|
|
token |= tt.get_token_type() << TOKEN_BITS;
|
|
} break;
|
|
case TK_BUILT_IN_FUNC: {
|
|
token |= tt.get_token_built_in_func() << TOKEN_BITS;
|
|
|
|
} break;
|
|
case TK_NEWLINE: {
|
|
token |= tt.get_token_line_indent() << TOKEN_BITS;
|
|
} break;
|
|
case TK_ERROR: {
|
|
ERR_FAIL_V(Vector<uint8_t>());
|
|
} break;
|
|
default: {
|
|
}
|
|
};
|
|
|
|
token_array.push_back(token);
|
|
|
|
if (tt.get_token() == TK_EOF) {
|
|
break;
|
|
}
|
|
tt.advance();
|
|
}
|
|
|
|
//reverse maps
|
|
|
|
Map<int, StringName> rev_identifier_map;
|
|
for (Map<StringName, int>::Element *E = identifier_map.front(); E; E = E->next()) {
|
|
rev_identifier_map[E->get()] = E->key();
|
|
}
|
|
|
|
Map<int, Variant> rev_constant_map;
|
|
const Variant *K = nullptr;
|
|
while ((K = constant_map.next(K))) {
|
|
rev_constant_map[constant_map[*K]] = *K;
|
|
}
|
|
|
|
Map<int, uint32_t> rev_line_map;
|
|
for (Map<uint32_t, int>::Element *E = line_map.front(); E; E = E->next()) {
|
|
rev_line_map[E->get()] = E->key();
|
|
}
|
|
|
|
//save header
|
|
buf.resize(24);
|
|
buf.write[0] = 'G';
|
|
buf.write[1] = 'D';
|
|
buf.write[2] = 'S';
|
|
buf.write[3] = 'C';
|
|
encode_uint32(BYTECODE_VERSION, &buf.write[4]);
|
|
encode_uint32(identifier_map.size(), &buf.write[8]);
|
|
encode_uint32(constant_map.size(), &buf.write[12]);
|
|
encode_uint32(line_map.size(), &buf.write[16]);
|
|
encode_uint32(token_array.size(), &buf.write[20]);
|
|
|
|
//save identifiers
|
|
|
|
for (Map<int, StringName>::Element *E = rev_identifier_map.front(); E; E = E->next()) {
|
|
CharString cs = String(E->get()).utf8();
|
|
int len = cs.length() + 1;
|
|
int extra = 4 - (len % 4);
|
|
if (extra == 4) {
|
|
extra = 0;
|
|
}
|
|
|
|
uint8_t ibuf[4];
|
|
encode_uint32(len + extra, ibuf);
|
|
for (int i = 0; i < 4; i++) {
|
|
buf.push_back(ibuf[i]);
|
|
}
|
|
for (int i = 0; i < len; i++) {
|
|
buf.push_back(cs[i] ^ 0xb6);
|
|
}
|
|
for (int i = 0; i < extra; i++) {
|
|
buf.push_back(0 ^ 0xb6);
|
|
}
|
|
}
|
|
|
|
for (Map<int, Variant>::Element *E = rev_constant_map.front(); E; E = E->next()) {
|
|
int len;
|
|
// Objects cannot be constant, never encode objects
|
|
Error err = encode_variant(E->get(), nullptr, len, false);
|
|
ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
|
|
int pos = buf.size();
|
|
buf.resize(pos + len);
|
|
encode_variant(E->get(), &buf.write[pos], len, false);
|
|
}
|
|
|
|
for (Map<int, uint32_t>::Element *E = rev_line_map.front(); E; E = E->next()) {
|
|
uint8_t ibuf[8];
|
|
encode_uint32(E->key(), &ibuf[0]);
|
|
encode_uint32(E->get(), &ibuf[4]);
|
|
for (int i = 0; i < 8; i++) {
|
|
buf.push_back(ibuf[i]);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < token_array.size(); i++) {
|
|
uint32_t token = token_array[i];
|
|
|
|
if (token & ~TOKEN_MASK) {
|
|
uint8_t buf4[4];
|
|
encode_uint32(token_array[i] | TOKEN_BYTE_MASK, &buf4[0]);
|
|
for (int j = 0; j < 4; j++) {
|
|
buf.push_back(buf4[j]);
|
|
}
|
|
} else {
|
|
buf.push_back(token);
|
|
}
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
CScriptTokenizerBuffer::Token CScriptTokenizerBuffer::get_token(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
|
|
if (offset < 0 || offset >= tokens.size()) {
|
|
return TK_EOF;
|
|
}
|
|
|
|
return CScriptTokenizerBuffer::Token(tokens[offset] & TOKEN_MASK);
|
|
}
|
|
|
|
StringName CScriptTokenizerBuffer::get_token_identifier(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
|
|
ERR_FAIL_INDEX_V(offset, tokens.size(), StringName());
|
|
uint32_t identifier = tokens[offset] >> TOKEN_BITS;
|
|
ERR_FAIL_UNSIGNED_INDEX_V(identifier, (uint32_t)identifiers.size(), StringName());
|
|
|
|
return identifiers[identifier];
|
|
}
|
|
|
|
CScriptFunctions::Function CScriptTokenizerBuffer::get_token_built_in_func(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
ERR_FAIL_INDEX_V(offset, tokens.size(), CScriptFunctions::FUNC_MAX);
|
|
return CScriptFunctions::Function(tokens[offset] >> TOKEN_BITS);
|
|
}
|
|
|
|
Variant::Type CScriptTokenizerBuffer::get_token_type(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
ERR_FAIL_INDEX_V(offset, tokens.size(), Variant::NIL);
|
|
|
|
return Variant::Type(tokens[offset] >> TOKEN_BITS);
|
|
}
|
|
|
|
int CScriptTokenizerBuffer::get_token_line(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
int pos = lines.find_nearest(offset);
|
|
|
|
if (pos < 0) {
|
|
return -1;
|
|
}
|
|
if (pos >= lines.size()) {
|
|
pos = lines.size() - 1;
|
|
}
|
|
|
|
uint32_t l = lines.getv(pos);
|
|
return l & TOKEN_LINE_MASK;
|
|
}
|
|
int CScriptTokenizerBuffer::get_token_column(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
int pos = lines.find_nearest(offset);
|
|
if (pos < 0) {
|
|
return -1;
|
|
}
|
|
if (pos >= lines.size()) {
|
|
pos = lines.size() - 1;
|
|
}
|
|
|
|
uint32_t l = lines.getv(pos);
|
|
return l >> TOKEN_LINE_BITS;
|
|
}
|
|
int CScriptTokenizerBuffer::get_token_line_indent(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
ERR_FAIL_INDEX_V(offset, tokens.size(), 0);
|
|
return tokens[offset] >> TOKEN_BITS;
|
|
}
|
|
const Variant &CScriptTokenizerBuffer::get_token_constant(int p_offset) const {
|
|
int offset = token + p_offset;
|
|
ERR_FAIL_INDEX_V(offset, tokens.size(), nil);
|
|
uint32_t constant = tokens[offset] >> TOKEN_BITS;
|
|
ERR_FAIL_UNSIGNED_INDEX_V(constant, (uint32_t)constants.size(), nil);
|
|
return constants[constant];
|
|
}
|
|
String CScriptTokenizerBuffer::get_token_error(int p_offset) const {
|
|
ERR_FAIL_V(String());
|
|
}
|
|
|
|
void CScriptTokenizerBuffer::advance(int p_amount) {
|
|
ERR_FAIL_INDEX(p_amount + token, tokens.size());
|
|
token += p_amount;
|
|
}
|
|
CScriptTokenizerBuffer::CScriptTokenizerBuffer() {
|
|
token = 0;
|
|
}
|