godot/modules/mono/editor/script_class_parser.cpp
Rémi Verschelde 87c80f529f
clang-format: Enable BreakBeforeTernaryOperators
clang-format keeps breaking the way it handles break *after* ternary operators,
so I give up and go with the only style they seem to actually test.
2021-10-28 14:50:33 +02:00

736 lines
18 KiB
C++

/*************************************************************************/
/* script_class_parser.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "script_class_parser.h"
#include "core/map.h"
#include "core/os/os.h"
#include "../utils/string_utils.h"
const char *ScriptClassParser::token_names[ScriptClassParser::TK_MAX] = {
"[",
"]",
"{",
"}",
".",
":",
",",
"Symbol",
"Identifier",
"String",
"Number",
"<",
">",
"EOF",
"Error"
};
String ScriptClassParser::get_token_name(ScriptClassParser::Token p_token) {
ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
return token_names[p_token];
}
ScriptClassParser::Token ScriptClassParser::get_token() {
while (true) {
switch (code[idx]) {
case '\n': {
line++;
idx++;
break;
};
case 0: {
return TK_EOF;
} break;
case '{': {
idx++;
return TK_CURLY_BRACKET_OPEN;
};
case '}': {
idx++;
return TK_CURLY_BRACKET_CLOSE;
};
case '[': {
idx++;
return TK_BRACKET_OPEN;
};
case ']': {
idx++;
return TK_BRACKET_CLOSE;
};
case '<': {
idx++;
return TK_OP_LESS;
};
case '>': {
idx++;
return TK_OP_GREATER;
};
case ':': {
idx++;
return TK_COLON;
};
case ',': {
idx++;
return TK_COMMA;
};
case '.': {
idx++;
return TK_PERIOD;
};
case '#': {
//compiler directive
while (code[idx] != '\n' && code[idx] != 0) {
idx++;
}
continue;
} break;
case '/': {
switch (code[idx + 1]) {
case '*': { // block comment
idx += 2;
while (true) {
if (code[idx] == 0) {
error_str = "Unterminated comment";
error = true;
return TK_ERROR;
} else if (code[idx] == '*' && code[idx + 1] == '/') {
idx += 2;
break;
} else if (code[idx] == '\n') {
line++;
}
idx++;
}
} break;
case '/': { // line comment skip
while (code[idx] != '\n' && code[idx] != 0) {
idx++;
}
} break;
default: {
value = "/";
idx++;
return TK_SYMBOL;
}
}
continue; // a comment
} break;
case '\'':
case '"': {
bool verbatim = idx != 0 && code[idx - 1] == '@';
CharType begin_str = code[idx];
idx++;
String tk_string = String();
while (true) {
if (code[idx] == 0) {
error_str = "Unterminated String";
error = true;
return TK_ERROR;
} else if (code[idx] == begin_str) {
if (verbatim && code[idx + 1] == '"') { // '""' is verbatim string's '\"'
idx += 2; // skip next '"' as well
continue;
}
idx += 1;
break;
} else if (code[idx] == '\\' && !verbatim) {
//escaped characters...
idx++;
CharType next = code[idx];
if (next == 0) {
error_str = "Unterminated String";
error = true;
return TK_ERROR;
}
CharType res = 0;
switch (next) {
case 'b':
res = 8;
break;
case 't':
res = 9;
break;
case 'n':
res = 10;
break;
case 'f':
res = 12;
break;
case 'r':
res = 13;
break;
case '\"':
res = '\"';
break;
case '\\':
res = '\\';
break;
default: {
res = next;
} break;
}
tk_string += res;
} else {
if (code[idx] == '\n')
line++;
tk_string += code[idx];
}
idx++;
}
value = tk_string;
return TK_STRING;
} break;
default: {
if (code[idx] <= 32) {
idx++;
break;
}
if ((code[idx] >= 33 && code[idx] <= 47) || (code[idx] >= 58 && code[idx] <= 63) || (code[idx] >= 91 && code[idx] <= 94) || code[idx] == 96 || (code[idx] >= 123 && code[idx] <= 127)) {
value = String::chr(code[idx]);
idx++;
return TK_SYMBOL;
}
if (code[idx] == '-' || (code[idx] >= '0' && code[idx] <= '9')) {
//a number
const CharType *rptr;
double number = String::to_double(&code[idx], &rptr);
idx += (rptr - &code[idx]);
value = number;
return TK_NUMBER;
} else if ((code[idx] == '@' && code[idx + 1] != '"') || code[idx] == '_' || (code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || code[idx] > 127) {
String id;
id += code[idx];
idx++;
while (code[idx] == '_' || (code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || (code[idx] >= '0' && code[idx] <= '9') || code[idx] > 127) {
id += code[idx];
idx++;
}
value = id;
return TK_IDENTIFIER;
} else if (code[idx] == '@' && code[idx + 1] == '"') {
// begin of verbatim string
idx++;
} else {
error_str = "Unexpected character.";
error = true;
return TK_ERROR;
}
}
}
}
}
Error ScriptClassParser::_skip_generic_type_params() {
Token tk;
while (true) {
tk = get_token();
if (tk == TK_IDENTIFIER) {
tk = get_token();
// Type specifications can end with "?" to denote nullable types, such as IList<int?>
if (tk == TK_SYMBOL) {
tk = get_token();
if (value.operator String() != "?") {
error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found unexpected symbol '" + value + "'";
error = true;
return ERR_PARSE_ERROR;
}
if (tk != TK_OP_GREATER && tk != TK_COMMA) {
error_str = "Nullable type symbol '?' is only allowed after an identifier, but found " + get_token_name(tk) + " next.";
error = true;
return ERR_PARSE_ERROR;
}
}
if (tk == TK_PERIOD) {
while (true) {
tk = get_token();
if (tk != TK_IDENTIFIER) {
error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
tk = get_token();
if (tk != TK_PERIOD)
break;
}
}
if (tk == TK_OP_LESS) {
Error err = _skip_generic_type_params();
if (err)
return err;
tk = get_token();
}
if (tk == TK_OP_GREATER) {
return OK;
} else if (tk != TK_COMMA) {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
} else if (tk == TK_OP_LESS) {
error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found " + get_token_name(TK_OP_LESS);
error = true;
return ERR_PARSE_ERROR;
} else if (tk == TK_OP_GREATER) {
return OK;
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
}
}
Error ScriptClassParser::_parse_type_full_name(String &r_full_name) {
Token tk = get_token();
if (tk != TK_IDENTIFIER) {
error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
r_full_name += String(value);
if (code[idx] == '<') {
idx++;
// We don't mind if the base is generic, but we skip it any ways since this information is not needed
Error err = _skip_generic_type_params();
if (err)
return err;
}
if (code[idx] != '.') // We only want to take the next token if it's a period
return OK;
tk = get_token();
CRASH_COND(tk != TK_PERIOD); // Assertion
r_full_name += ".";
return _parse_type_full_name(r_full_name);
}
Error ScriptClassParser::_parse_class_base(Vector<String> &r_base) {
String name;
Error err = _parse_type_full_name(name);
if (err)
return err;
Token tk = get_token();
if (tk == TK_COMMA) {
err = _parse_class_base(r_base);
if (err)
return err;
} else if (tk == TK_IDENTIFIER && String(value) == "where") {
err = _parse_type_constraints();
if (err) {
return err;
}
// An open curly bracket was parsed by _parse_type_constraints, so we can exit
} else if (tk == TK_CURLY_BRACKET_OPEN) {
// we are finished when we hit the open curly bracket
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
r_base.push_back(name);
return OK;
}
Error ScriptClassParser::_parse_type_constraints() {
Token tk = get_token();
if (tk != TK_IDENTIFIER) {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
tk = get_token();
if (tk != TK_COLON) {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
while (true) {
tk = get_token();
if (tk == TK_IDENTIFIER) {
if (String(value) == "where") {
return _parse_type_constraints();
}
tk = get_token();
if (tk == TK_PERIOD) {
while (true) {
tk = get_token();
if (tk != TK_IDENTIFIER) {
error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
tk = get_token();
if (tk != TK_PERIOD)
break;
}
}
}
if (tk == TK_COMMA) {
continue;
} else if (tk == TK_IDENTIFIER && String(value) == "where") {
return _parse_type_constraints();
} else if (tk == TK_SYMBOL && String(value) == "(") {
tk = get_token();
if (tk != TK_SYMBOL || String(value) != ")") {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
} else if (tk == TK_OP_LESS) {
Error err = _skip_generic_type_params();
if (err)
return err;
} else if (tk == TK_CURLY_BRACKET_OPEN) {
return OK;
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
}
}
Error ScriptClassParser::_parse_namespace_name(String &r_name, int &r_curly_stack) {
Token tk = get_token();
if (tk == TK_IDENTIFIER) {
r_name += String(value);
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
tk = get_token();
if (tk == TK_PERIOD) {
r_name += ".";
return _parse_namespace_name(r_name, r_curly_stack);
} else if (tk == TK_CURLY_BRACKET_OPEN) {
r_curly_stack++;
return OK;
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
}
Error ScriptClassParser::parse(const String &p_code) {
code = p_code;
idx = 0;
line = 0;
error_str = String();
error = false;
value = Variant();
classes.clear();
Token tk = get_token();
Map<int, NameDecl> name_stack;
int curly_stack = 0;
int type_curly_stack = 0;
while (!error && tk != TK_EOF) {
String identifier = value;
if (tk == TK_IDENTIFIER && (identifier == "class" || identifier == "struct")) {
bool is_class = identifier == "class";
tk = get_token();
if (tk == TK_IDENTIFIER) {
String name = value;
int at_level = curly_stack;
ClassDecl class_decl;
for (Map<int, NameDecl>::Element *E = name_stack.front(); E; E = E->next()) {
const NameDecl &name_decl = E->value();
if (name_decl.type == NameDecl::NAMESPACE_DECL) {
if (E != name_stack.front())
class_decl.namespace_ += ".";
class_decl.namespace_ += name_decl.name;
} else {
class_decl.name += name_decl.name + ".";
}
}
class_decl.name += name;
class_decl.nested = type_curly_stack > 0;
bool generic = false;
while (true) {
tk = get_token();
if (tk == TK_COLON) {
Error err = _parse_class_base(class_decl.base);
if (err)
return err;
curly_stack++;
type_curly_stack++;
break;
} else if (tk == TK_CURLY_BRACKET_OPEN) {
curly_stack++;
type_curly_stack++;
break;
} else if (tk == TK_OP_LESS && !generic) {
generic = true;
Error err = _skip_generic_type_params();
if (err)
return err;
} else if (tk == TK_IDENTIFIER && String(value) == "where") {
Error err = _parse_type_constraints();
if (err) {
return err;
}
// An open curly bracket was parsed by _parse_type_constraints, so we can exit
curly_stack++;
type_curly_stack++;
break;
} else {
error_str = "Unexpected token: " + get_token_name(tk);
error = true;
return ERR_PARSE_ERROR;
}
}
NameDecl name_decl;
name_decl.name = name;
name_decl.type = is_class ? NameDecl::CLASS_DECL : NameDecl::STRUCT_DECL;
name_stack[at_level] = name_decl;
if (is_class) {
if (!generic) { // no generics, thanks
classes.push_back(class_decl);
} else if (OS::get_singleton()->is_stdout_verbose()) {
String full_name = class_decl.namespace_;
if (full_name.length())
full_name += ".";
full_name += class_decl.name;
OS::get_singleton()->print("Ignoring generic class declaration: %s\n", full_name.utf8().get_data());
}
}
}
} else if (tk == TK_IDENTIFIER && identifier == "namespace") {
if (type_curly_stack > 0) {
error_str = "Found namespace nested inside type.";
error = true;
return ERR_PARSE_ERROR;
}
String name;
int at_level = curly_stack;
Error err = _parse_namespace_name(name, curly_stack);
if (err)
return err;
NameDecl name_decl;
name_decl.name = name;
name_decl.type = NameDecl::NAMESPACE_DECL;
name_stack[at_level] = name_decl;
} else if (tk == TK_CURLY_BRACKET_OPEN) {
curly_stack++;
} else if (tk == TK_CURLY_BRACKET_CLOSE) {
curly_stack--;
if (name_stack.has(curly_stack)) {
if (name_stack[curly_stack].type != NameDecl::NAMESPACE_DECL)
type_curly_stack--;
name_stack.erase(curly_stack);
}
}
tk = get_token();
}
if (!error && tk == TK_EOF && curly_stack > 0) {
error_str = "Reached EOF with missing close curly brackets.";
error = true;
}
if (error)
return ERR_PARSE_ERROR;
return OK;
}
static String get_preprocessor_directive(const String &p_line, int p_from) {
CRASH_COND(p_line[p_from] != '#');
p_from++;
int i = p_from;
while (i < p_line.length() && (p_line[i] == '_' || (p_line[i] >= 'A' && p_line[i] <= 'Z') || (p_line[i] >= 'a' && p_line[i] <= 'z') || p_line[i] > 127)) {
i++;
}
return p_line.substr(p_from, i - p_from);
}
static void run_dummy_preprocessor(String &r_source, const String &p_filepath) {
Vector<String> lines = r_source.split("\n", /* p_allow_empty: */ true);
bool *include_lines = memnew_arr(bool, lines.size());
int if_level = -1;
Vector<bool> is_branch_being_compiled;
for (int i = 0; i < lines.size(); i++) {
const String &line = lines[i];
const int line_len = line.length();
int j;
for (j = 0; j < line_len; j++) {
if (line[j] != ' ' && line[j] != '\t') {
if (line[j] == '#') {
// First non-whitespace char of the line is '#'
include_lines[i] = false;
String directive = get_preprocessor_directive(line, j);
if (directive == "if") {
if_level++;
is_branch_being_compiled.push_back(if_level == 0 || is_branch_being_compiled[if_level - 1]);
} else if (directive == "elif") {
ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#elif' directive. File: '" + p_filepath + "'.");
is_branch_being_compiled.write[if_level] = false;
} else if (directive == "else") {
ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#else' directive. File: '" + p_filepath + "'.");
is_branch_being_compiled.write[if_level] = false;
} else if (directive == "endif") {
ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#endif' directive. File: '" + p_filepath + "'.");
is_branch_being_compiled.remove(if_level);
if_level--;
}
break;
} else {
// First non-whitespace char of the line is not '#'
include_lines[i] = if_level == -1 || is_branch_being_compiled[if_level];
break;
}
}
}
if (j == line_len) {
// Loop ended without finding a non-whitespace character.
// Either the line was empty or it only contained whitespaces.
include_lines[i] = if_level == -1 || is_branch_being_compiled[if_level];
}
}
r_source.clear();
// Custom join ignoring lines removed by the preprocessor
for (int i = 0; i < lines.size(); i++) {
if (i > 0 && include_lines[i - 1])
r_source += '\n';
if (include_lines[i]) {
r_source += lines[i];
}
}
}
Error ScriptClassParser::parse_file(const String &p_filepath) {
String source;
Error ferr = read_all_file_utf8(p_filepath, source);
ERR_FAIL_COND_V_MSG(ferr != OK, ferr,
ferr == ERR_INVALID_DATA
? "File '" + p_filepath + "' contains invalid unicode (UTF-8), so it was not loaded."
" Please ensure that scripts are saved in valid UTF-8 unicode."
: "Failed to read file: '" + p_filepath + "'.");
run_dummy_preprocessor(source, p_filepath);
return parse(source);
}
String ScriptClassParser::get_error() {
return error_str;
}
Vector<ScriptClassParser::ClassDecl> ScriptClassParser::get_classes() {
return classes;
}