import re from typing import List, Dict import argparse from pathlib import Path from pycparser import CParser, c_ast from autopxd import AutoPxd # List the includes to the stdlib we are expecting. This is needed to hack # around them given they are needed for pycparser, but should endup in the pxd # as `from libc.stdint cimport uint8_t` instead of being inside the `cdef extern` # describing the whole header stuff. STDLIB_INCLUDES = { "stdbool.h": ["bool"], "stdint.h": [ "uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "uint_least16_t", "uint_least32_t" ], "wchar.h": ["wchar_t", "size_t"], "uchar.h": [ "char32_t", "char16_t", ], } STDLIB_TYPES = {t for m in STDLIB_INCLUDES.values() for t in m} class CCCP: """ CCCP: the Cheap&Coarse C Preprocessor PyCParser needs to get passed preprocessed C code, but we don't want to use a real one: - different OS have different preprocessors (msvc vs clang vs gcc) - we can control which #include to follow given we don't care about stdlibs ones - we can easily tweak the behavior of #ifdef parts to ignore platform specificities In the end remember that we are not compiling a C program, but creating a .pxd file that will (in conjuction with a .pyx) be used to generate a .c file that will include the pandemonium api headers. So there is no need to handle platform specific (or even opaque structure size !) detail here: they will be ignored by cython and left to the final C compilation. """ def __init__( self, include_dirs: List[str], forced_defined_vars: Dict[str, str], debug: bool = False ): self.source = [] self.source_cursor = 0 self.forced_defined_vars = forced_defined_vars.keys() self.defined_vars = {**forced_defined_vars} self.include_dirs = [Path(p) for p in include_dirs] self.ingnored_includes = set() self.debug = debug @staticmethod def source_to_lines(src: str) -> List[str]: # First remove all comments src = re.sub(r"(//.*$)", "", src, flags=re.MULTILINE) src = re.sub(r"/\*.*?\*/", "", src, flags=re.DOTALL) # Split lines, taking care of backslashes lines = [] multi_lines = "" for line in src.splitlines(): line = line.rstrip() if line.endswith("\\"): multi_lines += line[:-1] continue lines.append(multi_lines + line) multi_lines = "" return lines def debug_explain(self, msg): if self.debug: print(msg) def error_occurred(self, msg): extract = "\n".join(self.source[max(0, self.source_cursor - 5) : self.source_cursor + 5]) raise RuntimeError(f"{msg}\n\nOccurred around:\n{extract}") def handle_include(self, line): match_include = re.match(r"^\s*#\s*include\s+[<\"]([a-zA-Z0-9_./]+)[>\"]$", line) if not match_include: return None include_name = match_include.group(1) if include_name in STDLIB_INCLUDES.keys(): self.debug_explain(f"INCLUDE INGNORED {include_name}") self.source.pop(self.source_cursor) return 0 for include_dir in self.include_dirs: include_path = include_dir / include_name try: included_source = include_path.read_text() # Remove #include line and replace it by included source self.source = ( self.source[: self.source_cursor] + self.source_to_lines(included_source) + self.source[self.source_cursor + 1 :] ) self.debug_explain(f"INCLUDE {include_name}") return 0 except FileNotFoundError: pass self.error_occurred(f"Cannot resolve import `{line}`") def handle_define(self, line): match_define = re.match(r"^\s*#\s*define\s+([a-zA-Z0-9_]+)(\s+|$)", line) if not match_define: return None define_name = match_define.group(1) define_value = line[len(match_define.group(0)) :] if define_name not in self.forced_defined_vars: self.defined_vars[define_name] = self.expand_macros(define_value) self.debug_explain(f"DEF {define_name}={define_value}") else: self.debug_explain(f"DEF IGNORED {define_name}={define_value}") self.source.pop(self.source_cursor) return 0 def handle_define_macro(self, line): match_define_macro = re.match(r"^\s*#\s*define\s+([a-zA-Z0-9_]+)\(", line) if not match_define_macro: return None define_name = match_define_macro.group(1) define_value = line[len(match_define_macro.group(0)) :] # Macro are not supported, this is ok given they are not used # (but some are defined) in the gdnative headers. # As a sanity measure, we make sure the code generated if the macro # is used will cause the C parser to crash. self.defined_vars[define_name] = f"#error unsuported macro {define_name}" self.debug_explain(f"DEF MACRO {define_name}=__UNSUPORTED__") self.source.pop(self.source_cursor) return 0 def handle_undef(self, line): match_undefine = re.match(r"^\s*#\s*undef\s+([a-zA-Z0-9_]+)$", line) if not match_undefine: return None define_name = match_undefine.group(1) if define_name not in self.forced_defined_vars: self.defined_vars.pop(define_name) self.debug_explain(f"UNDEF {define_name}") else: self.debug_explain(f"UNDEF INGNORED {define_name}") self.source.pop(self.source_cursor) return 0 def handle_if(self, line): # Replace ifdef/ifndef by generic if to simplify parsing line = re.sub(r"^\s*#\s*ifdef\s+([a-zA-Z0-9_]+)$", r"#if defined(\1)", line) line = re.sub(r"^\s*#\s*ifndef\s+([a-zA-Z0-9_]+)$", r"#if !defined(\1)", line) match_if = re.match(r"^\s*#\s*if\s+", line) if not match_if: return None def _eval_if_condition(condition): # Turn condition into Python code and eval it \o/ expr = condition.replace("||", " or ") expr = expr.replace("&&", " and ") expr = expr.replace("!", " not ") expr = re.sub(r"defined\(([a-zA-Z0-9_]+)\)", r"defined('\1')", expr) try: return eval( expr, {"defined": lambda key: key in self.defined_vars}, self.defined_vars ) except Exception as exc: self.error_occurred( f"Error {exc} while evaluating `{expr}` (generated from `{condition}`)" ) def _keep_until_next_condition(offset): nested_count = 0 kept_body = [] while True: try: line = self.source[self.source_cursor + offset] except IndexError: self.error_occurred("Reach end of file without #endif") if re.match(r"^\s*#\s*(if|ifdef|ifndef)(\s+|$)", line): # Nested #if nested_count += 1 else_match = re.match(r"^\s*#\s*(else$|elif\s+)", line) if else_match: if nested_count == 0: condition_type = else_match.group(1).strip() condition = line[len(else_match.group(1)) :] return kept_body, condition_type, condition, offset + 1 if re.match(r"^\s*#\s*endif$", line): if nested_count == 0: return kept_body, "endif", "", offset + 1 else: nested_count -= 1 offset += 1 kept_body.append(line) def _retreive_kept_body(condition, offset): if _eval_if_condition(condition): kept_body, condition_type, condition, offset = _keep_until_next_condition(offset) # Skip other else/elif body parts until the matching endif while condition_type != "endif": _, condition_type, _, offset = _keep_until_next_condition(offset) return kept_body, offset else: # Ignore the if body part _, condition_type, condition, offset = _keep_until_next_condition(offset) if condition_type == "elif": return _retreive_kept_body(condition, offset) elif condition_type == "else": return _retreive_kept_body("True", offset) else: # endif return [], offset if_condition = line[len(match_if.group()) :] body, offset = _retreive_kept_body(if_condition, offset=1) if_starts = self.source_cursor if_ends = self.source_cursor + offset self.source[if_starts:if_ends] = body self.debug_explain(f"IF ({line}) ==> {if_starts} {if_ends}") return 0 # 0 is not equivalent to None ! def handle_unknown(self, line): match_unknown = re.match(r"^\s*#", line) if not match_unknown: return None self.error_occurred(f"Unknown preprocessor command `{line}`") def expand_macros(self, line): # Simple optim to discard most of the lines given regex search is cpu heavy if not line or all(key not in line for key in self.defined_vars.keys()): return line expanded_line = line # Recursive expansion given a macro can reference another one while True: for key, value in self.defined_vars.items(): expanded_line = re.sub( f"(^|[^a-zA-Z0-9_]){key}([^a-zA-Z0-9_]|$)", f"\\g<1>{value}\\g<2>", expanded_line, ) if expanded_line == line: break line = expanded_line return line def parse(self, src: str) -> str: self.source = self.source_to_lines(src) cpp_handlers = ( self.handle_define, self.handle_define_macro, self.handle_if, self.handle_include, self.handle_undef, self.handle_unknown, ) while True: try: source_line = self.source[self.source_cursor] except IndexError: # Parsing is done break for cpp_handler in cpp_handlers: eaten_lines = cpp_handler(source_line) if eaten_lines is not None: self.source_cursor += eaten_lines break else: # Not a preprocessor line self.source[self.source_cursor] = self.expand_macros(source_line) self.source_cursor += 1 return "\n".join(self.source) class PatchedAutoPxd(AutoPxd): def visit_TypeDecl(self, node): # Ignore types from stdlib (will be provided by the # `from libc.stdint cimport uint8_t` syntax) if node.declname in STDLIB_TYPES: return else: return super().visit_TypeDecl(node) def visit_ArrayDecl(self, node): # autopxd doesn't support array with an expression as size, but in: # typedef struct {uint8_t _dont_touch_that[PANDEMONIUM_VECTOR3_SIZE];} pandemonium_vector3; # `PANDEMONIUM_VECTOR3_SIZE` gets resolved as `sizeof(void*)` :( if node.type.declname == "_dont_touch_that": # Of course the 0 size is wrong, but it's not an issue given # we don't touch this array in Cython code (hence the name ^^) node.dim = c_ast.Constant(type="int", value="0") return super().visit_ArrayDecl(node) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Convert gdnative_api_struct.gen.h into Cython .pxd" ) parser.add_argument( "--input", "-i", required=True, metavar="PANDEMONIUM_HEADERS_PATH", help="Path to GDNative headers", ) parser.add_argument( "--output", "-o", required=True, type=argparse.FileType("w", encoding="utf8"), metavar="GDNATIVE_API_STRUCT_PXD", help="Path to store the generated gdnative_api_struct.pxd file", ) args = parser.parse_args() # Step 1: preprocessing header_name = "gdnative_api_struct.gen.h" with open(f"{args.input}/{header_name}", "r") as fd: source = fd.read() # салют товарищ ! cccp = CCCP( include_dirs=[args.input], forced_defined_vars={"GDAPI": "", "GDN_EXPORT": "", "GDCALLINGCONV": ""}, ) preprocessed = "" # pycparser requires each symbol must be defined, hence provide a dummy # definition of the needed stdlib types. # Note those definitions will then be detected and ignored by PatchedAutoPxd. for stdtype in STDLIB_TYPES: preprocessed += f"typedef int {stdtype};\n" preprocessed += cccp.parse(source) with open("output.preprocessed.c", "w") as fd: fd.write(preprocessed) # Step 2: C parsing parser = CParser() ast = parser.parse(preprocessed) # Step 3: .pxd generation p = PatchedAutoPxd(header_name) p.visit(ast) pxd_cdef = p.lines() # Remove the cdef part given we want to add the `nogil` option and # we also want to add the `pandemonium_method_flags` C inline code assert pxd_cdef[0].startswith("cdef extern from") pxd_cdef_body = "\n".join(pxd_cdef[1:]) pxd = f"""\ # /!\\ Autogenerated code, modifications will be lost /!\\ # see `generation/generate_gdnative_api_struct.py` from libc.stddef cimport wchar_t, size_t from libc.stdint cimport {', '.join(STDLIB_INCLUDES['stdint.h'])} ctypedef uint_least16_t char16_t ctypedef uint_least32_t char32_t cdef extern from "{header_name}" nogil: \"\"\" typedef enum {{ PANDEMONIUM_METHOD_FLAG_NORMAL = 1, PANDEMONIUM_METHOD_FLAG_EDITOR = 2, PANDEMONIUM_METHOD_FLAG_NOSCRIPT = 4, PANDEMONIUM_METHOD_FLAG_CONST = 8, PANDEMONIUM_METHOD_FLAG_REVERSE = 16, PANDEMONIUM_METHOD_FLAG_VIRTUAL = 32, PANDEMONIUM_METHOD_FLAG_FROM_SCRIPT = 64, PANDEMONIUM_METHOD_FLAG_VARARG = 128, PANDEMONIUM_METHOD_FLAGS_DEFAULT = PANDEMONIUM_METHOD_FLAG_NORMAL }} pandemonium_method_flags; \"\"\" ctypedef enum pandemonium_method_flags: PANDEMONIUM_METHOD_FLAG_NORMAL = 1 PANDEMONIUM_METHOD_FLAG_EDITOR = 2 PANDEMONIUM_METHOD_FLAG_NOSCRIPT = 4 PANDEMONIUM_METHOD_FLAG_CONST = 8 PANDEMONIUM_METHOD_FLAG_REVERSE = 16 # used for events PANDEMONIUM_METHOD_FLAG_VIRTUAL = 32 PANDEMONIUM_METHOD_FLAG_FROM_SCRIPT = 64 PANDEMONIUM_METHOD_FLAG_VARARG = 128 PANDEMONIUM_METHOD_FLAGS_DEFAULT = 1 # METHOD_FLAG_NORMAL ctypedef bint bool {pxd_cdef_body} """ args.output.write(pxd)