gdnative_python/generation/generate_gdnative_api_struct.py

409 lines
15 KiB
Python

import re
from typing import List, Dict
import argparse
from pathlib import Path
from pycparser import CParser, c_ast
from autopxd import AutoPxd
# List the includes to the stdlib we are expecting. This is needed to hack
# around them given they are needed for pycparser, but should endup in the pxd
# as `from libc.stdint cimport uint8_t` instead of being inside the `cdef extern`
# describing the whole header stuff.
STDLIB_INCLUDES = {
"stdbool.h": ["bool"],
"stdint.h": [
"uint8_t",
"int8_t",
"uint16_t",
"int16_t",
"uint32_t",
"int32_t",
"uint64_t",
"int64_t",
"uint_least16_t",
"uint_least32_t"
],
"wchar.h": ["wchar_t", "size_t"],
"uchar.h": [
"char32_t",
"char16_t",
],
}
STDLIB_TYPES = {t for m in STDLIB_INCLUDES.values() for t in m}
class CCCP:
"""
CCCP: the Cheap&Coarse C Preprocessor
PyCParser needs to get passed preprocessed C code, but we don't want to
use a real one:
- different OS have different preprocessors (msvc vs clang vs gcc)
- we can control which #include to follow given we don't care about stdlibs ones
- we can easily tweak the behavior of #ifdef parts to ignore platform specificities
In the end remember that we are not compiling a C program, but creating a
.pxd file that will (in conjuction with a .pyx) be used to generate a .c
file that will include the pandemonium api headers. So there is no need to handle
platform specific (or even opaque structure size !) detail here: they will
be ignored by cython and left to the final C compilation.
"""
def __init__(
self, include_dirs: List[str], forced_defined_vars: Dict[str, str], debug: bool = False
):
self.source = []
self.source_cursor = 0
self.forced_defined_vars = forced_defined_vars.keys()
self.defined_vars = {**forced_defined_vars}
self.include_dirs = [Path(p) for p in include_dirs]
self.ingnored_includes = set()
self.debug = debug
@staticmethod
def source_to_lines(src: str) -> List[str]:
# First remove all comments
src = re.sub(r"(//.*$)", "", src, flags=re.MULTILINE)
src = re.sub(r"/\*.*?\*/", "", src, flags=re.DOTALL)
# Split lines, taking care of backslashes
lines = []
multi_lines = ""
for line in src.splitlines():
line = line.rstrip()
if line.endswith("\\"):
multi_lines += line[:-1]
continue
lines.append(multi_lines + line)
multi_lines = ""
return lines
def debug_explain(self, msg):
if self.debug:
print(msg)
def error_occurred(self, msg):
extract = "\n".join(self.source[max(0, self.source_cursor - 5) : self.source_cursor + 5])
raise RuntimeError(f"{msg}\n\nOccurred around:\n{extract}")
def handle_include(self, line):
match_include = re.match(r"^\s*#\s*include\s+[<\"]([a-zA-Z0-9_./]+)[>\"]$", line)
if not match_include:
return None
include_name = match_include.group(1)
if include_name in STDLIB_INCLUDES.keys():
self.debug_explain(f"INCLUDE INGNORED {include_name}")
self.source.pop(self.source_cursor)
return 0
for include_dir in self.include_dirs:
include_path = include_dir / include_name
try:
included_source = include_path.read_text()
# Remove #include line and replace it by included source
self.source = (
self.source[: self.source_cursor]
+ self.source_to_lines(included_source)
+ self.source[self.source_cursor + 1 :]
)
self.debug_explain(f"INCLUDE {include_name}")
return 0
except FileNotFoundError:
pass
self.error_occurred(f"Cannot resolve import `{line}`")
def handle_define(self, line):
match_define = re.match(r"^\s*#\s*define\s+([a-zA-Z0-9_]+)(\s+|$)", line)
if not match_define:
return None
define_name = match_define.group(1)
define_value = line[len(match_define.group(0)) :]
if define_name not in self.forced_defined_vars:
self.defined_vars[define_name] = self.expand_macros(define_value)
self.debug_explain(f"DEF {define_name}={define_value}")
else:
self.debug_explain(f"DEF IGNORED {define_name}={define_value}")
self.source.pop(self.source_cursor)
return 0
def handle_define_macro(self, line):
match_define_macro = re.match(r"^\s*#\s*define\s+([a-zA-Z0-9_]+)\(", line)
if not match_define_macro:
return None
define_name = match_define_macro.group(1)
define_value = line[len(match_define_macro.group(0)) :]
# Macro are not supported, this is ok given they are not used
# (but some are defined) in the gdnative headers.
# As a sanity measure, we make sure the code generated if the macro
# is used will cause the C parser to crash.
self.defined_vars[define_name] = f"#error unsuported macro {define_name}"
self.debug_explain(f"DEF MACRO {define_name}=__UNSUPORTED__")
self.source.pop(self.source_cursor)
return 0
def handle_undef(self, line):
match_undefine = re.match(r"^\s*#\s*undef\s+([a-zA-Z0-9_]+)$", line)
if not match_undefine:
return None
define_name = match_undefine.group(1)
if define_name not in self.forced_defined_vars:
self.defined_vars.pop(define_name)
self.debug_explain(f"UNDEF {define_name}")
else:
self.debug_explain(f"UNDEF INGNORED {define_name}")
self.source.pop(self.source_cursor)
return 0
def handle_if(self, line):
# Replace ifdef/ifndef by generic if to simplify parsing
line = re.sub(r"^\s*#\s*ifdef\s+([a-zA-Z0-9_]+)$", r"#if defined(\1)", line)
line = re.sub(r"^\s*#\s*ifndef\s+([a-zA-Z0-9_]+)$", r"#if !defined(\1)", line)
match_if = re.match(r"^\s*#\s*if\s+", line)
if not match_if:
return None
def _eval_if_condition(condition):
# Turn condition into Python code and eval it \o/
expr = condition.replace("||", " or ")
expr = expr.replace("&&", " and ")
expr = expr.replace("!", " not ")
expr = re.sub(r"defined\(([a-zA-Z0-9_]+)\)", r"defined('\1')", expr)
try:
return eval(
expr, {"defined": lambda key: key in self.defined_vars}, self.defined_vars
)
except Exception as exc:
self.error_occurred(
f"Error {exc} while evaluating `{expr}` (generated from `{condition}`)"
)
def _keep_until_next_condition(offset):
nested_count = 0
kept_body = []
while True:
try:
line = self.source[self.source_cursor + offset]
except IndexError:
self.error_occurred("Reach end of file without #endif")
if re.match(r"^\s*#\s*(if|ifdef|ifndef)(\s+|$)", line):
# Nested #if
nested_count += 1
else_match = re.match(r"^\s*#\s*(else$|elif\s+)", line)
if else_match:
if nested_count == 0:
condition_type = else_match.group(1).strip()
condition = line[len(else_match.group(1)) :]
return kept_body, condition_type, condition, offset + 1
if re.match(r"^\s*#\s*endif$", line):
if nested_count == 0:
return kept_body, "endif", "", offset + 1
else:
nested_count -= 1
offset += 1
kept_body.append(line)
def _retreive_kept_body(condition, offset):
if _eval_if_condition(condition):
kept_body, condition_type, condition, offset = _keep_until_next_condition(offset)
# Skip other else/elif body parts until the matching endif
while condition_type != "endif":
_, condition_type, _, offset = _keep_until_next_condition(offset)
return kept_body, offset
else:
# Ignore the if body part
_, condition_type, condition, offset = _keep_until_next_condition(offset)
if condition_type == "elif":
return _retreive_kept_body(condition, offset)
elif condition_type == "else":
return _retreive_kept_body("True", offset)
else: # endif
return [], offset
if_condition = line[len(match_if.group()) :]
body, offset = _retreive_kept_body(if_condition, offset=1)
if_starts = self.source_cursor
if_ends = self.source_cursor + offset
self.source[if_starts:if_ends] = body
self.debug_explain(f"IF ({line}) ==> {if_starts} {if_ends}")
return 0 # 0 is not equivalent to None !
def handle_unknown(self, line):
match_unknown = re.match(r"^\s*#", line)
if not match_unknown:
return None
self.error_occurred(f"Unknown preprocessor command `{line}`")
def expand_macros(self, line):
# Simple optim to discard most of the lines given regex search is cpu heavy
if not line or all(key not in line for key in self.defined_vars.keys()):
return line
expanded_line = line
# Recursive expansion given a macro can reference another one
while True:
for key, value in self.defined_vars.items():
expanded_line = re.sub(
f"(^|[^a-zA-Z0-9_]){key}([^a-zA-Z0-9_]|$)",
f"\\g<1>{value}\\g<2>",
expanded_line,
)
if expanded_line == line:
break
line = expanded_line
return line
def parse(self, src: str) -> str:
self.source = self.source_to_lines(src)
cpp_handlers = (
self.handle_define,
self.handle_define_macro,
self.handle_if,
self.handle_include,
self.handle_undef,
self.handle_unknown,
)
while True:
try:
source_line = self.source[self.source_cursor]
except IndexError:
# Parsing is done
break
for cpp_handler in cpp_handlers:
eaten_lines = cpp_handler(source_line)
if eaten_lines is not None:
self.source_cursor += eaten_lines
break
else:
# Not a preprocessor line
self.source[self.source_cursor] = self.expand_macros(source_line)
self.source_cursor += 1
return "\n".join(self.source)
class PatchedAutoPxd(AutoPxd):
def visit_TypeDecl(self, node):
# Ignore types from stdlib (will be provided by the
# `from libc.stdint cimport uint8_t` syntax)
if node.declname in STDLIB_TYPES:
return
else:
return super().visit_TypeDecl(node)
def visit_ArrayDecl(self, node):
# autopxd doesn't support array with an expression as size, but in:
# typedef struct {uint8_t _dont_touch_that[PANDEMONIUM_VECTOR3_SIZE];} pandemonium_vector3;
# `PANDEMONIUM_VECTOR3_SIZE` gets resolved as `sizeof(void*)` :(
if node.type.declname == "_dont_touch_that":
# Of course the 0 size is wrong, but it's not an issue given
# we don't touch this array in Cython code (hence the name ^^)
node.dim = c_ast.Constant(type="int", value="0")
return super().visit_ArrayDecl(node)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Convert gdnative_api_struct.gen.h into Cython .pxd"
)
parser.add_argument(
"--input",
"-i",
required=True,
metavar="PANDEMONIUM_HEADERS_PATH",
help="Path to GDNative headers",
)
parser.add_argument(
"--output",
"-o",
required=True,
type=argparse.FileType("w", encoding="utf8"),
metavar="GDNATIVE_API_STRUCT_PXD",
help="Path to store the generated gdnative_api_struct.pxd file",
)
args = parser.parse_args()
# Step 1: preprocessing
header_name = "gdnative_api_struct.gen.h"
with open(f"{args.input}/{header_name}", "r") as fd:
source = fd.read()
# салют товарищ !
cccp = CCCP(
include_dirs=[args.input],
forced_defined_vars={"GDAPI": "", "GDN_EXPORT": "", "GDCALLINGCONV": ""},
)
preprocessed = ""
# pycparser requires each symbol must be defined, hence provide a dummy
# definition of the needed stdlib types.
# Note those definitions will then be detected and ignored by PatchedAutoPxd.
for stdtype in STDLIB_TYPES:
preprocessed += f"typedef int {stdtype};\n"
preprocessed += cccp.parse(source)
with open("output.preprocessed.c", "w") as fd:
fd.write(preprocessed)
# Step 2: C parsing
parser = CParser()
ast = parser.parse(preprocessed)
# Step 3: .pxd generation
p = PatchedAutoPxd(header_name)
p.visit(ast)
pxd_cdef = p.lines()
# Remove the cdef part given we want to add the `nogil` option and
# we also want to add the `pandemonium_method_flags` C inline code
assert pxd_cdef[0].startswith("cdef extern from")
pxd_cdef_body = "\n".join(pxd_cdef[1:])
pxd = f"""\
# /!\\ Autogenerated code, modifications will be lost /!\\
# see `generation/generate_gdnative_api_struct.py`
from libc.stddef cimport wchar_t, size_t
from libc.stdint cimport {', '.join(STDLIB_INCLUDES['stdint.h'])}
ctypedef uint_least16_t char16_t
ctypedef uint_least32_t char32_t
cdef extern from "{header_name}" nogil:
\"\"\"
typedef enum {{
PANDEMONIUM_METHOD_FLAG_NORMAL = 1,
PANDEMONIUM_METHOD_FLAG_EDITOR = 2,
PANDEMONIUM_METHOD_FLAG_NOSCRIPT = 4,
PANDEMONIUM_METHOD_FLAG_CONST = 8,
PANDEMONIUM_METHOD_FLAG_REVERSE = 16,
PANDEMONIUM_METHOD_FLAG_VIRTUAL = 32,
PANDEMONIUM_METHOD_FLAG_FROM_SCRIPT = 64,
PANDEMONIUM_METHOD_FLAG_VARARG = 128,
PANDEMONIUM_METHOD_FLAGS_DEFAULT = PANDEMONIUM_METHOD_FLAG_NORMAL
}} pandemonium_method_flags;
\"\"\"
ctypedef enum pandemonium_method_flags:
PANDEMONIUM_METHOD_FLAG_NORMAL = 1
PANDEMONIUM_METHOD_FLAG_EDITOR = 2
PANDEMONIUM_METHOD_FLAG_NOSCRIPT = 4
PANDEMONIUM_METHOD_FLAG_CONST = 8
PANDEMONIUM_METHOD_FLAG_REVERSE = 16 # used for events
PANDEMONIUM_METHOD_FLAG_VIRTUAL = 32
PANDEMONIUM_METHOD_FLAG_FROM_SCRIPT = 64
PANDEMONIUM_METHOD_FLAG_VARARG = 128
PANDEMONIUM_METHOD_FLAGS_DEFAULT = 1 # METHOD_FLAG_NORMAL
ctypedef bint bool
{pxd_cdef_body}
"""
args.output.write(pxd)