From 5582be03b0285fb5d64f0696d735ff87f1600215 Mon Sep 17 00:00:00 2001 From: Firaas Ahmed Khan Date: Sun, 15 Dec 2024 20:04:12 +0530 Subject: [PATCH 01/10] Added static keyword --- crosstl/src/translator/lexer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crosstl/src/translator/lexer.py b/crosstl/src/translator/lexer.py index a4faa411..6a4aadb8 100644 --- a/crosstl/src/translator/lexer.py +++ b/crosstl/src/translator/lexer.py @@ -73,6 +73,7 @@ ("BITWISE_OR", r"\|"), ("BITWISE_XOR", r"\^"), ("BITWISE_NOT", r"~"), + ("STATIC", r"\bstatic\b") ] KEYWORDS = { @@ -85,6 +86,7 @@ "for": "FOR", "return": "RETURN", "const": "CONST", + "static": "STATIC", } From 06d95a6977aaa17676f579573a2175a3727c24a7 Mon Sep 17 00:00:00 2001 From: Firaas Ahmed Khan Date: Wed, 1 Jan 2025 18:16:58 +0530 Subject: [PATCH 02/10] Changes in DirectxLexer.py --- crosstl/src/backend/DirectX/DirectxLexer.py | 4 ++-- getting_start.ipynb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crosstl/src/backend/DirectX/DirectxLexer.py b/crosstl/src/backend/DirectX/DirectxLexer.py index b8a23b25..dd59a07f 100644 --- a/crosstl/src/backend/DirectX/DirectxLexer.py +++ b/crosstl/src/backend/DirectX/DirectxLexer.py @@ -19,8 +19,8 @@ ("ELSE_IF", r"\belse\sif\b"), ("ELSE", r"\belse\b"), ("FOR", r"\bfor\b"), - ("WHILE", r"\b\while\b"), - ("DO", r"\b\do\b"), + ("WHILE", r"\bwhile\b"), + ("DO", r"\bdo\b"), ("REGISTER", r"\bregister\b"), ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), ("NUMBER", r"\d+(\.\d+)?"), diff --git a/getting_start.ipynb b/getting_start.ipynb index 66a0fa8a..713fd025 100644 --- a/getting_start.ipynb +++ b/getting_start.ipynb @@ -736,7 +736,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.12.0" } }, "nbformat": 4, From a6e5f4146a65ab370f6735f70d78c0afed24e0f7 Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 17:47:16 +0000 Subject: [PATCH 03/10] Merge from main directX Lexer --- crosstl/src/backend/DirectX/DirectxLexer.py | 203 ++++++++++++-------- 1 file changed, 119 insertions(+), 84 deletions(-) diff --git a/crosstl/src/backend/DirectX/DirectxLexer.py b/crosstl/src/backend/DirectX/DirectxLexer.py index dd59a07f..20e6eb8b 100644 --- a/crosstl/src/backend/DirectX/DirectxLexer.py +++ b/crosstl/src/backend/DirectX/DirectxLexer.py @@ -1,66 +1,11 @@ import re +from typing import Iterator, Tuple, List -TOKENS = [ - ("COMMENT_SINGLE", r"//.*"), - ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), - ("STRUCT", r"\bstruct\b"), - ("CBUFFER", r"\bcbuffer\b"), - ("TEXTURE2D", r"\bTexture2D\b"), - ("SAMPLER_STATE", r"\bSamplerState\b"), - ("FVECTOR", r"\bfloat[2-4]\b"), - ("FLOAT", r"\bfloat\b"), - ("INT", r"\bint\b"), - ("UINT", r"\buint\b"), - ("BOOL", r"\bbool\b"), - ("MATRIX", r"\bfloat[2-4]x[2-4]\b"), - ("VOID", r"\bvoid\b"), - ("RETURN", r"\breturn\b"), - ("IF", r"\bif\b"), - ("ELSE_IF", r"\belse\sif\b"), - ("ELSE", r"\belse\b"), - ("FOR", r"\bfor\b"), - ("WHILE", r"\bwhile\b"), - ("DO", r"\bdo\b"), - ("REGISTER", r"\bregister\b"), - ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), - ("NUMBER", r"\d+(\.\d+)?"), - ("LBRACE", r"\{"), - ("RBRACE", r"\}"), - ("LPAREN", r"\("), - ("RPAREN", r"\)"), - ("LBRACKET", r"\["), - ("RBRACKET", r"\]"), - ("SEMICOLON", r";"), - ("COMMA", r","), - ("COLON", r":"), - ("QUESTION", r"\?"), - ("SHIFT_LEFT", r"<<"), - ("LESS_EQUAL", r"<="), - ("GREATER_EQUAL", r">="), - ("LESS_THAN", r"<"), - ("GREATER_THAN", r">"), - ("EQUAL", r"=="), - ("NOT_EQUAL", r"!="), - ("PLUS_EQUALS", r"\+="), - ("MINUS_EQUALS", r"-="), - ("MULTIPLY_EQUALS", r"\*="), - ("DIVIDE_EQUALS", r"/="), - ("ASSIGN_XOR", r"\^="), - ("ASSIGN_OR", r"\|="), - ("ASSIGN_AND", r"\&="), - ("BITWISE_XOR", r"\^"), - ("AND", r"&&"), - ("OR", r"\|\|"), - ("BITWISE_OR", r"\|"), - ("DOT", r"\."), - ("MULTIPLY", r"\*"), - ("DIVIDE", r"/"), - ("PLUS", r"\+"), - ("MINUS", r"-"), - ("EQUALS", r"="), - ("WHITESPACE", r"\s+"), -] +# using sets for faster lookup +SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"} + +# define keywords dictionary KEYWORDS = { "struct": "STRUCT", "cbuffer": "CBUFFER", @@ -70,6 +15,7 @@ "float2": "FVECTOR", "float3": "FVECTOR", "float4": "FVECTOR", + "double": "DOUBLE", "int": "INT", "uint": "UINT", "bool": "BOOL", @@ -81,38 +27,127 @@ "while": "WHILE", "do": "DO", "register": "REGISTER", + "switch": "SWITCH", + "case": "CASE", + "default": "DEFAULT", + "break": "BREAK", } +# use tuple for immutable token types that won't change +TOKENS = tuple( + [ + ("COMMENT_SINGLE", r"//.*"), + ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), + ("INCLUDE", r"\#include\b"), + ("STRUCT", r"\bstruct\b"), + ("CBUFFER", r"\bcbuffer\b"), + ("TEXTURE2D", r"\bTexture2D\b"), + ("SAMPLER_STATE", r"\bSamplerState\b"), + ("FVECTOR", r"\bfloat[2-4]\b"), + ("FLOAT", r"\bfloat\b"), + ("DOUBLE", r"\bdouble\b"), + ("INT", r"\bint\b"), + ("UINT", r"\buint\b"), + ("BOOL", r"\bbool\b"), + ("MATRIX", r"\bfloat[2-4]x[2-4]\b"), + ("VOID", r"\bvoid\b"), + ("RETURN", r"\breturn\b"), + ("IF", r"\bif\b"), + ("ELSE_IF", r"\belse\sif\b"), + ("ELSE", r"\belse\b"), + ("FOR", r"\bfor\b"), + ("WHILE", r"\bwhile\b"), + ("DO", r"\bdo\b"), + ("REGISTER", r"\bregister\b"), + ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), + ("NUMBER", r"\d+(\.\d+)?"), + ("LBRACE", r"\{"), + ("RBRACE", r"\}"), + ("LPAREN", r"\("), + ("RPAREN", r"\)"), + ("LBRACKET", r"\["), + ("RBRACKET", r"\]"), + ("SEMICOLON", r";"), + ("COMMA", r","), + ("COLON", r":"), + ("QUESTION", r"\?"), + ("SHIFT_LEFT", r"<<"), + ("SHIFT_RIGHT", r">>"), + ("LESS_EQUAL", r"<="), + ("GREATER_EQUAL", r">="), + ("LESS_THAN", r"<"), + ("GREATER_THAN", r">"), + ("EQUAL", r"=="), + ("NOT_EQUAL", r"!="), + ("PLUS_EQUALS", r"\+="), + ("MINUS_EQUALS", r"-="), + ("MULTIPLY_EQUALS", r"\*="), + ("DIVIDE_EQUALS", r"/="), + ("ASSIGN_XOR", r"\^="), + ("ASSIGN_OR", r"\|="), + ("ASSIGN_AND", r"\&="), + ("BITWISE_XOR", r"\^"), + ("LOGICAL_AND", r"&&"), + ("LOGICAL_OR", r"\|\|"), + ("BITWISE_OR", r"\|"), + ("DOT", r"\."), + ("MULTIPLY", r"\*"), + ("DIVIDE", r"/"), + ("PLUS", r"\+"), + ("MINUS", r"-"), + ("EQUALS", r"="), + ("WHITESPACE", r"\s+"), + ("STRING", r"\"[^\"]*\""), + ("SWITCH", r"\bswitch\b"), + ("CASE", r"\bcase\b"), + ("DEFAULT", r"\bdefault\b"), + ("BREAK", r"\bbreak\b"), + ("MOD", r"%"), + ] +) + class HLSLLexer: - def __init__(self, code): + def __init__(self, code: str): + self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS] self.code = code - self.tokens = [] - self.tokenize() + self._length = len(code) - def tokenize(self): + def tokenize(self) -> List[Tuple[str, str]]: + # tokenize the input code and return list of tokens + return list(self.token_generator()) + + def token_generator(self) -> Iterator[Tuple[str, str]]: + # function that yields tokens one at a time pos = 0 - while pos < len(self.code): - match = None - for token_type, pattern in TOKENS: - regex = re.compile(pattern) - match = regex.match(self.code, pos) - if match: - text = match.group(0) - if token_type == "IDENTIFIER" and text in KEYWORDS: - token_type = KEYWORDS[text] - if token_type not in [ - "WHITESPACE", - "COMMENT_SINGLE", - "COMMENT_MULTI", - ]: - token = (token_type, text) - self.tokens.append(token) - pos = match.end(0) - break - if not match: + while pos < self._length: + token = self._next_token(pos) + if token is None: raise SyntaxError( f"Illegal character '{self.code[pos]}' at position {pos}" ) + new_pos, token_type, text = token + + if token_type == "IDENTIFIER" and text in KEYWORDS: + token_type = KEYWORDS[text] + + if token_type not in SKIP_TOKENS: + yield (token_type, text) + + pos = new_pos + + yield ("EOF", "") + + def _next_token(self, pos: int) -> Tuple[int, str, str]: + # find the next token starting at the given position + for token_type, pattern in self._token_patterns: + match = pattern.match(self.code, pos) + if match: + return match.end(0), token_type, match.group(0) + return None - self.tokens.append(("EOF", "")) + @classmethod + def from_file(cls, filepath: str, chunk_size: int = 8192) -> "HLSLLexer": + # create a lexer instance from a file, reading in chunks + with open(filepath, "r") as f: + return cls(f.read()) \ No newline at end of file From 5cf7875efec02fd3e970d70808c34eed75c7e82d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:51:08 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- crosstl/backend/Metal/MetalLexer.py | 2 +- crosstl/backend/Opengl/OpenglLexer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crosstl/backend/Metal/MetalLexer.py b/crosstl/backend/Metal/MetalLexer.py index a38becaa..8c0967dd 100644 --- a/crosstl/backend/Metal/MetalLexer.py +++ b/crosstl/backend/Metal/MetalLexer.py @@ -127,4 +127,4 @@ def tokenize(self): f"Illegal character '{self.code[pos]}' at position {pos}" ) - self.tokens.append(("EOF", "")) \ No newline at end of file + self.tokens.append(("EOF", "")) diff --git a/crosstl/backend/Opengl/OpenglLexer.py b/crosstl/backend/Opengl/OpenglLexer.py index e565e193..abf2f2d6 100644 --- a/crosstl/backend/Opengl/OpenglLexer.py +++ b/crosstl/backend/Opengl/OpenglLexer.py @@ -132,4 +132,4 @@ def tokenize(self): raise SyntaxError( f"Illegal character '{self.code[pos]}' at position {pos}" ) - self.tokens.append(("EOF", "")) \ No newline at end of file + self.tokens.append(("EOF", "")) From c314791a48a05866c2ab041de6c4bbf6ff05470a Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 17:53:30 +0000 Subject: [PATCH 05/10] Add STATIC token and keyword to lexer --- crosstl/translator/lexer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crosstl/translator/lexer.py b/crosstl/translator/lexer.py index 21929417..e1ed07b9 100644 --- a/crosstl/translator/lexer.py +++ b/crosstl/translator/lexer.py @@ -76,6 +76,7 @@ ("BITWISE_OR", r"\|"), ("BITWISE_XOR", r"\^"), ("BITWISE_NOT", r"~"), + ("STATIC", r"\bstatic\b"), ] ) @@ -90,6 +91,7 @@ "for": "FOR", "return": "RETURN", "const": "CONST", + "static": "STATIC", } From bb955a6c6d571cef13a8d52f4d2e7140316914c9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:59:23 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- crosstl/backend/Opengl/OpenglLexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crosstl/backend/Opengl/OpenglLexer.py b/crosstl/backend/Opengl/OpenglLexer.py index a44b2b8d..dc652698 100644 --- a/crosstl/backend/Opengl/OpenglLexer.py +++ b/crosstl/backend/Opengl/OpenglLexer.py @@ -151,4 +151,4 @@ def _next_token(self, pos: int) -> Tuple[int, str, str]: def from_file(cls, filepath: str, chunk_size: int = 8192) -> "GLSLLexer": # create a lexer instance from a file, reading in chunks with open(filepath, "r") as f: - return cls(f.read()) \ No newline at end of file + return cls(f.read()) From acedf0b48cf98ca37103645c5677d2f096ef4aac Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 18:04:27 +0000 Subject: [PATCH 07/10] Merge and fix opengl lexer --- crosstl/backend/Opengl/OpenglLexer.py | 289 ++++++++++++-------------- 1 file changed, 135 insertions(+), 154 deletions(-) diff --git a/crosstl/backend/Opengl/OpenglLexer.py b/crosstl/backend/Opengl/OpenglLexer.py index dc652698..5a1a18af 100644 --- a/crosstl/backend/Opengl/OpenglLexer.py +++ b/crosstl/backend/Opengl/OpenglLexer.py @@ -1,154 +1,135 @@ -import re -from typing import Iterator, Tuple, List - -# using sets for faster lookup -SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"} - -TOKENS = tuple( - [ - ("COMMENT_SINGLE", r"//.*"), - ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), - ("VERSION", r"#version"), - ("PREPROCESSOR", r"#\w+"), - ("CONSTANT", r"\bconst\b"), - ("STRUCT", r"\bstruct\b"), - ("UNIFORM", r"\buniform\b"), - ("SAMPLER2D", r"\bsampler2D\b"), - ("SAMPLERCUBE", r"\bsamplerCube\b"), - ("BUFFER", r"\bbuffer\b"), - ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"), - ("MATRIX", r"\bmat[234](x[234])?\b"), - ("FLOAT", r"\bfloat\b"), - ("INT", r"\bint\b"), - ("DOUBLE", r"\bdouble\b"), - ("UINT", r"\buint\b"), - ("BOOL", r"\bbool\b"), - ("VOID", r"\bvoid\b"), - ("RETURN", r"\breturn\b"), - ("IF", r"\bif\b"), - ("ELSE", r"\belse\b"), - ("FOR", r"\bfor\b"), - ("WHILE", r"\bwhile\b"), - ("DO", r"\bdo\b"), - ("IN", r"\bin\b"), - ("OUT", r"\bout\b"), - ("INOUT", r"\binout\b"), - ("LAYOUT", r"\blayout\b"), - ("ATTRIBUTE", r"\battribute\b"), - ("VARYING", r"\bvarying\b"), - ("CONST", r"\bconst\b"), - ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), - ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"), - ("LBRACE", r"\{"), - ("RBRACE", r"\}"), - ("LPAREN", r"\("), - ("RPAREN", r"\)"), - ("LBRACKET", r"\["), - ("RBRACKET", r"\]"), - ("SEMICOLON", r";"), - ("STRING", r'"[^"]*"'), - ("COMMA", r","), - ("COLON", r":"), - ("LESS_EQUAL", r"<="), - ("GREATER_EQUAL", r">="), - ("LESS_THAN", r"<"), - ("GREATER_THAN", r">"), - ("EQUAL", r"=="), - ("NOT_EQUAL", r"!="), - ("ASSIGN_AND", r"&="), - ("ASSIGN_OR", r"\|="), - ("ASSIGN_XOR", r"\^="), - ("LOGICAL_AND", r"&&"), - ("LOGICAL_OR", r"\|\|"), - ("ASSIGN_MOD", r"%="), - ("MOD", r"%"), - ("PLUS_EQUALS", r"\+="), - ("MINUS_EQUALS", r"-="), - ("MULTIPLY_EQUALS", r"\*="), - ("DIVIDE_EQUALS", r"/="), - ("PLUS", r"\+"), - ("MINUS", r"-"), - ("MULTIPLY", r"\*"), - ("DIVIDE", r"/"), - ("DOT", r"\."), - ("EQUALS", r"="), - ("BITWISE_AND", r"&"), - ("BITWISE_OR", r"\|"), - ("BITWISE_XOR", r"\^"), - ("BITWISE_NOT", r"~"), - ("WHITESPACE", r"\s+"), - ] -) - -KEYWORDS = { - "struct": "STRUCT", - "uniform": "UNIFORM", - "sampler2D": "SAMPLER2D", - "samplerCube": "SAMPLERCUBE", - "float": "FLOAT", - "int": "INT", - "uint": "UINT", - "bool": "BOOL", - "void": "VOID", - "double": "DOUBLE", - "return": "RETURN", - "else if": "ELSE_IF", - "if": "IF", - "else": "ELSE", - "for": "FOR", - "while": "WHILE", - "do": "DO", - "in": "IN", - "out": "OUT", - "inout": "INOUT", - "layout": "LAYOUT", - "attribute": "ATTRIBUTE", - "varying": "VARYING", - "const": "CONST", -} - - -class GLSLLexer: - def __init__(self, code: str): - self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS] - self.code = code - self._length = len(code) - - def tokenize(self) -> List[Tuple[str, str]]: - # tokenize the input code and return list of tokens - return list(self.token_generator()) - - def token_generator(self) -> Iterator[Tuple[str, str]]: - # function that yields tokens one at a time - pos = 0 - while pos < self._length: - token = self._next_token(pos) - if token is None: - raise SyntaxError( - f"Illegal character '{self.code[pos]}' at position {pos}" - ) - new_pos, token_type, text = token - - if token_type == "IDENTIFIER" and text in KEYWORDS: - token_type = KEYWORDS[text] - - if token_type not in SKIP_TOKENS: - yield (token_type, text) - - pos = new_pos - - yield ("EOF", "") - - def _next_token(self, pos: int) -> Tuple[int, str, str]: - # find the next token starting at the given position - for token_type, pattern in self._token_patterns: - match = pattern.match(self.code, pos) - if match: - return match.end(0), token_type, match.group(0) - return None - - @classmethod - def from_file(cls, filepath: str, chunk_size: int = 8192) -> "GLSLLexer": - # create a lexer instance from a file, reading in chunks - with open(filepath, "r") as f: - return cls(f.read()) +import re + +TOKENS = [ + ("COMMENT_SINGLE", r"//.*"), + ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), + ("VERSION", r"#version"), + ("PREPROCESSOR", r"#\w+"), + ("CONSTANT", r"\bconst\b"), + ("STRUCT", r"\bstruct\b"), + ("UNIFORM", r"\buniform\b"), + ("SAMPLER2D", r"\bsampler2D\b"), + ("SAMPLERCUBE", r"\bsamplerCube\b"), + ("BUFFER", r"\bbuffer\b"), + ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"), + ("MATRIX", r"\bmat[234](x[234])?\b"), + ("FLOAT", r"\bfloat\b"), + ("INT", r"\bint\b"), + ("DOUBLE", r"\bdouble\b"), + ("UINT", r"\buint\b"), + ("BOOL", r"\bbool\b"), + ("VOID", r"\bvoid\b"), + ("RETURN", r"\breturn\b"), + ("IF", r"\bif\b"), + ("ELSE", r"\belse\b"), + ("FOR", r"\bfor\b"), + ("WHILE", r"\bwhile\b"), + ("DO", r"\bdo\b"), + ("IN", r"\bin\b"), + ("OUT", r"\bout\b"), + ("INOUT", r"\binout\b"), + ("LAYOUT", r"\blayout\b"), + ("ATTRIBUTE", r"\battribute\b"), + ("VARYING", r"\bvarying\b"), + ("CONST", r"\bconst\b"), + ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), + ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"), + ("LBRACE", r"\{"), + ("RBRACE", r"\}"), + ("LPAREN", r"\("), + ("RPAREN", r"\)"), + ("LBRACKET", r"\["), + ("RBRACKET", r"\]"), + ("SEMICOLON", r";"), + ("STRING", r'"[^"]*"'), + ("COMMA", r","), + ("COLON", r":"), + ("LESS_EQUAL", r"<="), + ("GREATER_EQUAL", r">="), + ("LESS_THAN", r"<"), + ("GREATER_THAN", r">"), + ("EQUAL", r"=="), + ("NOT_EQUAL", r"!="), + ("ASSIGN_AND", r"&="), + ("ASSIGN_OR", r"\|="), + ("ASSIGN_XOR", r"\^="), + ("LOGICAL_AND", r"&&"), + ("LOGICAL_OR", r"\|\|"), + ("ASSIGN_MOD", r"%="), + ("MOD", r"%"), + ("PLUS_EQUALS", r"\+="), + ("MINUS_EQUALS", r"-="), + ("MULTIPLY_EQUALS", r"\*="), + ("DIVIDE_EQUALS", r"/="), + ("PLUS", r"\+"), + ("MINUS", r"-"), + ("MULTIPLY", r"\*"), + ("DIVIDE", r"/"), + ("DOT", r"\."), + ("EQUALS", r"="), + ("BITWISE_AND", r"&"), + ("BITWISE_OR", r"\|"), + ("BITWISE_XOR", r"\^"), + ("BITWISE_NOT", r"~"), + ("WHITESPACE", r"\s+"), +] + +KEYWORDS = { + "struct": "STRUCT", + "uniform": "UNIFORM", + "sampler2D": "SAMPLER2D", + "samplerCube": "SAMPLERCUBE", + "float": "FLOAT", + "int": "INT", + "uint": "UINT", + "bool": "BOOL", + "void": "VOID", + "double": "DOUBLE", + "return": "RETURN", + "else if": "ELSE_IF", + "if": "IF", + "else": "ELSE", + "for": "FOR", + "while": "WHILE", + "do": "DO", + "in": "IN", + "out": "OUT", + "inout": "INOUT", + "layout": "LAYOUT", + "attribute": "ATTRIBUTE", + "varying": "VARYING", + "const": "CONST", +} + + +class GLSLLexer: + def __init__(self, code): + self.code = code + self.tokens = [] + self.tokenize() + + def tokenize(self): + pos = 0 + while pos < len(self.code): + match = None + for token_type, pattern in TOKENS: + regex = re.compile(pattern) + match = regex.match(self.code, pos) + if match: + text = match.group(0) + if token_type == "IDENTIFIER" and text in KEYWORDS: + token_type = KEYWORDS[text] + if token_type not in [ + "WHITESPACE", + "COMMENT_SINGLE", + "COMMENT_MULTI", + ]: + token = (token_type, text) + self.tokens.append(token) + pos = match.end(0) + break + if not match: + raise SyntaxError( + f"Illegal character '{self.code[pos]}' at position {pos}" + ) + self.tokens.append(("EOF", "")) From 675440a9f7f43bb8895268cf487fa20d624eb4fc Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 18:06:17 +0000 Subject: [PATCH 08/10] Refactor OpenGL lexer for improved tokenization efficiency and readability --- crosstl/backend/Opengl/OpenglLexer.py | 213 ++++++++++++++------------ 1 file changed, 116 insertions(+), 97 deletions(-) diff --git a/crosstl/backend/Opengl/OpenglLexer.py b/crosstl/backend/Opengl/OpenglLexer.py index 5a1a18af..41f8f9f5 100644 --- a/crosstl/backend/Opengl/OpenglLexer.py +++ b/crosstl/backend/Opengl/OpenglLexer.py @@ -1,78 +1,84 @@ import re +from typing import Iterator, Tuple, List -TOKENS = [ - ("COMMENT_SINGLE", r"//.*"), - ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), - ("VERSION", r"#version"), - ("PREPROCESSOR", r"#\w+"), - ("CONSTANT", r"\bconst\b"), - ("STRUCT", r"\bstruct\b"), - ("UNIFORM", r"\buniform\b"), - ("SAMPLER2D", r"\bsampler2D\b"), - ("SAMPLERCUBE", r"\bsamplerCube\b"), - ("BUFFER", r"\bbuffer\b"), - ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"), - ("MATRIX", r"\bmat[234](x[234])?\b"), - ("FLOAT", r"\bfloat\b"), - ("INT", r"\bint\b"), - ("DOUBLE", r"\bdouble\b"), - ("UINT", r"\buint\b"), - ("BOOL", r"\bbool\b"), - ("VOID", r"\bvoid\b"), - ("RETURN", r"\breturn\b"), - ("IF", r"\bif\b"), - ("ELSE", r"\belse\b"), - ("FOR", r"\bfor\b"), - ("WHILE", r"\bwhile\b"), - ("DO", r"\bdo\b"), - ("IN", r"\bin\b"), - ("OUT", r"\bout\b"), - ("INOUT", r"\binout\b"), - ("LAYOUT", r"\blayout\b"), - ("ATTRIBUTE", r"\battribute\b"), - ("VARYING", r"\bvarying\b"), - ("CONST", r"\bconst\b"), - ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), - ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"), - ("LBRACE", r"\{"), - ("RBRACE", r"\}"), - ("LPAREN", r"\("), - ("RPAREN", r"\)"), - ("LBRACKET", r"\["), - ("RBRACKET", r"\]"), - ("SEMICOLON", r";"), - ("STRING", r'"[^"]*"'), - ("COMMA", r","), - ("COLON", r":"), - ("LESS_EQUAL", r"<="), - ("GREATER_EQUAL", r">="), - ("LESS_THAN", r"<"), - ("GREATER_THAN", r">"), - ("EQUAL", r"=="), - ("NOT_EQUAL", r"!="), - ("ASSIGN_AND", r"&="), - ("ASSIGN_OR", r"\|="), - ("ASSIGN_XOR", r"\^="), - ("LOGICAL_AND", r"&&"), - ("LOGICAL_OR", r"\|\|"), - ("ASSIGN_MOD", r"%="), - ("MOD", r"%"), - ("PLUS_EQUALS", r"\+="), - ("MINUS_EQUALS", r"-="), - ("MULTIPLY_EQUALS", r"\*="), - ("DIVIDE_EQUALS", r"/="), - ("PLUS", r"\+"), - ("MINUS", r"-"), - ("MULTIPLY", r"\*"), - ("DIVIDE", r"/"), - ("DOT", r"\."), - ("EQUALS", r"="), - ("BITWISE_AND", r"&"), - ("BITWISE_OR", r"\|"), - ("BITWISE_XOR", r"\^"), - ("BITWISE_NOT", r"~"), - ("WHITESPACE", r"\s+"), -] +# using sets for faster lookup +SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"} + +TOKENS = tuple( + [ + ("COMMENT_SINGLE", r"//.*"), + ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"), + ("VERSION", r"#version"), + ("PREPROCESSOR", r"#\w+"), + ("CONSTANT", r"\bconst\b"), + ("STRUCT", r"\bstruct\b"), + ("UNIFORM", r"\buniform\b"), + ("SAMPLER2D", r"\bsampler2D\b"), + ("SAMPLERCUBE", r"\bsamplerCube\b"), + ("BUFFER", r"\bbuffer\b"), + ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"), + ("MATRIX", r"\bmat[234](x[234])?\b"), + ("FLOAT", r"\bfloat\b"), + ("INT", r"\bint\b"), + ("DOUBLE", r"\bdouble\b"), + ("UINT", r"\buint\b"), + ("BOOL", r"\bbool\b"), + ("VOID", r"\bvoid\b"), + ("RETURN", r"\breturn\b"), + ("IF", r"\bif\b"), + ("ELSE", r"\belse\b"), + ("FOR", r"\bfor\b"), + ("WHILE", r"\bwhile\b"), + ("DO", r"\bdo\b"), + ("IN", r"\bin\b"), + ("OUT", r"\bout\b"), + ("INOUT", r"\binout\b"), + ("LAYOUT", r"\blayout\b"), + ("ATTRIBUTE", r"\battribute\b"), + ("VARYING", r"\bvarying\b"), + ("CONST", r"\bconst\b"), + ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"), + ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"), + ("LBRACE", r"\{"), + ("RBRACE", r"\}"), + ("LPAREN", r"\("), + ("RPAREN", r"\)"), + ("LBRACKET", r"\["), + ("RBRACKET", r"\]"), + ("SEMICOLON", r";"), + ("STRING", r'"[^"]*"'), + ("COMMA", r","), + ("COLON", r":"), + ("LESS_EQUAL", r"<="), + ("GREATER_EQUAL", r">="), + ("LESS_THAN", r"<"), + ("GREATER_THAN", r">"), + ("EQUAL", r"=="), + ("NOT_EQUAL", r"!="), + ("ASSIGN_AND", r"&="), + ("ASSIGN_OR", r"\|="), + ("ASSIGN_XOR", r"\^="), + ("LOGICAL_AND", r"&&"), + ("LOGICAL_OR", r"\|\|"), + ("ASSIGN_MOD", r"%="), + ("MOD", r"%"), + ("PLUS_EQUALS", r"\+="), + ("MINUS_EQUALS", r"-="), + ("MULTIPLY_EQUALS", r"\*="), + ("DIVIDE_EQUALS", r"/="), + ("PLUS", r"\+"), + ("MINUS", r"-"), + ("MULTIPLY", r"\*"), + ("DIVIDE", r"/"), + ("DOT", r"\."), + ("EQUALS", r"="), + ("BITWISE_AND", r"&"), + ("BITWISE_OR", r"\|"), + ("BITWISE_XOR", r"\^"), + ("BITWISE_NOT", r"~"), + ("WHITESPACE", r"\s+"), + ] +) KEYWORDS = { "struct": "STRUCT", @@ -103,33 +109,46 @@ class GLSLLexer: - def __init__(self, code): + def __init__(self, code: str): + self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS] self.code = code - self.tokens = [] - self.tokenize() + self._length = len(code) + + def tokenize(self) -> List[Tuple[str, str]]: + # tokenize the input code and return list of tokens + return list(self.token_generator()) - def tokenize(self): + def token_generator(self) -> Iterator[Tuple[str, str]]: + # function that yields tokens one at a time pos = 0 - while pos < len(self.code): - match = None - for token_type, pattern in TOKENS: - regex = re.compile(pattern) - match = regex.match(self.code, pos) - if match: - text = match.group(0) - if token_type == "IDENTIFIER" and text in KEYWORDS: - token_type = KEYWORDS[text] - if token_type not in [ - "WHITESPACE", - "COMMENT_SINGLE", - "COMMENT_MULTI", - ]: - token = (token_type, text) - self.tokens.append(token) - pos = match.end(0) - break - if not match: + while pos < self._length: + token = self._next_token(pos) + if token is None: raise SyntaxError( f"Illegal character '{self.code[pos]}' at position {pos}" ) - self.tokens.append(("EOF", "")) + new_pos, token_type, text = token + + if token_type == "IDENTIFIER" and text in KEYWORDS: + token_type = KEYWORDS[text] + + if token_type not in SKIP_TOKENS: + yield (token_type, text) + + pos = new_pos + + yield ("EOF", "") + + def _next_token(self, pos: int) -> Tuple[int, str, str]: + # find the next token starting at the given position + for token_type, pattern in self._token_patterns: + match = pattern.match(self.code, pos) + if match: + return match.end(0), token_type, match.group(0) + return None + + @classmethod + def from_file(cls, filepath: str, chunk_size: int = 8192) -> "GLSLLexer": + # create a lexer instance from a file, reading in chunks + with open(filepath, "r") as f: + return cls(f.read()) From 74178d8192cdbf812c8763cbe4d008e46e8c2315 Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 18:09:45 +0000 Subject: [PATCH 09/10] Update GitHub Actions workflow to use OWNER_TOKEN for merging PRs --- .github/workflows/PR_merge_main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml index 1be76236..b3e8b0f9 100644 --- a/.github/workflows/PR_merge_main.yml +++ b/.github/workflows/PR_merge_main.yml @@ -26,9 +26,9 @@ jobs: # Merge the main branch into the PR branch git merge origin/main env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.OWNER_TOKEN }} - name: Push changes run: | # Push the updated PR branch back to the forked repository - git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }} + git push https://x-access-token:${{ secrets.OWNER_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }} From 5679c0354a5f34cb2e053b34373f26215f7d90bb Mon Sep 17 00:00:00 2001 From: Nripesh Niketan Date: Fri, 3 Jan 2025 18:11:17 +0000 Subject: [PATCH 10/10] Revert "Update GitHub Actions workflow to use OWNER_TOKEN for merging PRs" This reverts commit 74178d8192cdbf812c8763cbe4d008e46e8c2315. --- .github/workflows/PR_merge_main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml index b3e8b0f9..1be76236 100644 --- a/.github/workflows/PR_merge_main.yml +++ b/.github/workflows/PR_merge_main.yml @@ -26,9 +26,9 @@ jobs: # Merge the main branch into the PR branch git merge origin/main env: - GITHUB_TOKEN: ${{ secrets.OWNER_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Push changes run: | # Push the updated PR branch back to the forked repository - git push https://x-access-token:${{ secrets.OWNER_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }} + git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}