From aab35ef69c29d88998e3852ab686f448cff64dc5 Mon Sep 17 00:00:00 2001
From: dheeraxspidey <152774274+dheeraxspidey@users.noreply.github.com>
Date: Fri, 3 Jan 2025 20:42:39 +0530
Subject: [PATCH 01/13] Add modulus operator support with tests (#248)

* Add modulus operator support with tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* feat: implement modulus operator in backend parsers

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 crosstl/backend/DirectX/DirectxLexer.py       |  1 +
 crosstl/backend/DirectX/DirectxParser.py      |  2 +-
 crosstl/backend/Metal/MetalLexer.py           |  2 ++
 crosstl/backend/Metal/MetalParser.py          |  2 +-
 crosstl/backend/Mojo/MojoLexer.py             |  1 +
 crosstl/backend/Mojo/MojoParser.py            |  2 +-
 crosstl/backend/Opengl/OpenglParser.py        |  2 +-
 crosstl/backend/Vulkan/VulkanLexer.py         |  2 +-
 crosstl/backend/Vulkan/VulkanParser.py        |  6 ++--
 crosstl/backend/slang/SlangLexer.py           |  2 ++
 crosstl/backend/slang/SlangParser.py          |  2 +-
 crosstl/translator/parser.py                  |  3 +-
 tests/test_backend/test_directx/test_lexer.py | 16 +++++++++
 .../test_backend/test_directx/test_parser.py  | 13 +++++++
 tests/test_backend/test_metal/test_lexer.py   | 16 +++++++++
 tests/test_backend/test_metal/test_parser.py  | 14 ++++++++
 tests/test_backend/test_mojo/test_lexer.py    | 17 +++++++--
 tests/test_backend/test_mojo/test_parser.py   | 13 +++++--
 tests/test_backend/test_opengl/test_lexer.py  | 16 +++++++++
 tests/test_backend/test_opengl/test_parser.py | 13 +++++++
 tests/test_backend/test_slang/test_lexer.py   | 16 +++++++++
 tests/test_backend/test_slang/test_parser.py  | 16 +++++++++
 tests/test_backend/test_vulkan/test_lexer.py  | 17 +++++++--
 tests/test_backend/test_vulkan/test_parser.py | 15 ++++++--
 tests/test_translator/test_parser.py          | 35 +++++++++++++++++++
 25 files changed, 222 insertions(+), 22 deletions(-)

diff --git a/crosstl/backend/DirectX/DirectxLexer.py b/crosstl/backend/DirectX/DirectxLexer.py
index b6003c1d..367549e8 100644
--- a/crosstl/backend/DirectX/DirectxLexer.py
+++ b/crosstl/backend/DirectX/DirectxLexer.py
@@ -67,6 +67,7 @@
     ("CASE", r"\bcase\b"),
     ("DEFAULT", r"\bdefault\b"),
     ("BREAK", r"\bbreak\b"),
+    ("MOD", r"%"),
 ]
 
 KEYWORDS = {
diff --git a/crosstl/backend/DirectX/DirectxParser.py b/crosstl/backend/DirectX/DirectxParser.py
index e7c03095..5c5ff80b 100644
--- a/crosstl/backend/DirectX/DirectxParser.py
+++ b/crosstl/backend/DirectX/DirectxParser.py
@@ -498,7 +498,7 @@ def parse_additive(self):
 
     def parse_multiplicative(self):
         left = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[1]
             self.eat(self.current_token[0])
             right = self.parse_unary()
diff --git a/crosstl/backend/Metal/MetalLexer.py b/crosstl/backend/Metal/MetalLexer.py
index d6701235..8c0967dd 100644
--- a/crosstl/backend/Metal/MetalLexer.py
+++ b/crosstl/backend/Metal/MetalLexer.py
@@ -64,6 +64,8 @@
     ("EQUALS", r"="),
     ("bitwise_and", r"&"),
     ("WHITESPACE", r"\s+"),
+    ("MOD", r"%"),
+    ("ASSIGN_MOD", r"%="),
 ]
 
 KEYWORDS = {
diff --git a/crosstl/backend/Metal/MetalParser.py b/crosstl/backend/Metal/MetalParser.py
index f72a7bf0..d121507b 100644
--- a/crosstl/backend/Metal/MetalParser.py
+++ b/crosstl/backend/Metal/MetalParser.py
@@ -452,7 +452,7 @@ def parse_additive(self):
 
     def parse_multiplicative(self):
         left = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[1]
             self.eat(self.current_token[0])
             right = self.parse_unary()
diff --git a/crosstl/backend/Mojo/MojoLexer.py b/crosstl/backend/Mojo/MojoLexer.py
index 355b8e9e..b1395ec3 100644
--- a/crosstl/backend/Mojo/MojoLexer.py
+++ b/crosstl/backend/Mojo/MojoLexer.py
@@ -50,6 +50,7 @@
     ("DOT", r"\."),
     ("EQUALS", r"="),
     ("WHITESPACE", r"\s+"),
+    ("MOD", r"%"),
 ]
 
 # Define keywords specific to mojo
diff --git a/crosstl/backend/Mojo/MojoParser.py b/crosstl/backend/Mojo/MojoParser.py
index 64cf50eb..e2ac3031 100644
--- a/crosstl/backend/Mojo/MojoParser.py
+++ b/crosstl/backend/Mojo/MojoParser.py
@@ -460,7 +460,7 @@ def parse_additive(self):
 
     def parse_multiplicative(self):
         left = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[1]
             self.eat(self.current_token[0])
             right = self.parse_unary()
diff --git a/crosstl/backend/Opengl/OpenglParser.py b/crosstl/backend/Opengl/OpenglParser.py
index f2d15ecc..370334cb 100644
--- a/crosstl/backend/Opengl/OpenglParser.py
+++ b/crosstl/backend/Opengl/OpenglParser.py
@@ -712,7 +712,7 @@ def parse_multiplicative(self):
 
         """
         expr = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[0]
             self.eat(op)
             right = self.parse_unary()
diff --git a/crosstl/backend/Vulkan/VulkanLexer.py b/crosstl/backend/Vulkan/VulkanLexer.py
index 471ab723..4b2e5d8b 100644
--- a/crosstl/backend/Vulkan/VulkanLexer.py
+++ b/crosstl/backend/Vulkan/VulkanLexer.py
@@ -36,7 +36,6 @@
     ("MINUS", r"-"),
     ("MULTIPLY", r"\*"),
     ("DIVIDE", r"/"),
-    ("MODULUS", r"%"),
     ("LESS_EQUAL", r"<="),
     ("GREATER_EQUAL", r">="),
     ("NOT_EQUAL", r"!="),
@@ -50,6 +49,7 @@
     ("BINARY_NOT", r"~"),
     ("QUESTION", r"\?"),
     ("COLON", r":"),
+    ("MOD", r"%"),
 ]
 
 KEYWORDS = {
diff --git a/crosstl/backend/Vulkan/VulkanParser.py b/crosstl/backend/Vulkan/VulkanParser.py
index 8b2247cd..f72d68d0 100644
--- a/crosstl/backend/Vulkan/VulkanParser.py
+++ b/crosstl/backend/Vulkan/VulkanParser.py
@@ -466,9 +466,9 @@ def parse_primary(self):
 
     def parse_multiplicative(self):
         left = self.parse_primary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
-            op = self.current_token[0]
-            self.eat(op)
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
+            op = self.current_token[1]
+            self.eat(self.current_token[0])
             right = self.parse_primary()
             left = BinaryOpNode(left, op, right)
         return left
diff --git a/crosstl/backend/slang/SlangLexer.py b/crosstl/backend/slang/SlangLexer.py
index 13d5a7e6..9d8808b1 100644
--- a/crosstl/backend/slang/SlangLexer.py
+++ b/crosstl/backend/slang/SlangLexer.py
@@ -72,6 +72,8 @@
     ("CONSTEXPR", r"\bconstexpr\b"),
     ("STATIC", r"\bstatic\b"),
     ("INLINE", r"\binline\b"),
+    ("MOD", r"%"),  # Add modulus operator
+    # Add modulus assignment
 ]
 
 # Keywords map for matching identifiers to token types
diff --git a/crosstl/backend/slang/SlangParser.py b/crosstl/backend/slang/SlangParser.py
index 0e097793..daa5200d 100644
--- a/crosstl/backend/slang/SlangParser.py
+++ b/crosstl/backend/slang/SlangParser.py
@@ -489,7 +489,7 @@ def parse_additive(self):
 
     def parse_multiplicative(self):
         left = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[1]
             self.eat(self.current_token[0])
             right = self.parse_unary()
diff --git a/crosstl/translator/parser.py b/crosstl/translator/parser.py
index d9cbccfa..c17f72a5 100644
--- a/crosstl/translator/parser.py
+++ b/crosstl/translator/parser.py
@@ -653,7 +653,7 @@ def parse_multiplicative(self):
 
         """
         expr = self.parse_unary()
-        while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
+        while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
             op = self.current_token[0]
             self.eat(op)
             right = self.parse_unary()
@@ -803,6 +803,7 @@ def parse_expression(self):
             "BITWISE_AND",
             "ASSIGN_SHIFT_RIGHT",
             "ASSIGN_SHIFT_LEFT",
+            "MOD",
         ]:
             op = self.current_token[0]
             self.eat(op)
diff --git a/tests/test_backend/test_directx/test_lexer.py b/tests/test_backend/test_directx/test_lexer.py
index aa785b9e..83b3825a 100644
--- a/tests/test_backend/test_directx/test_lexer.py
+++ b/tests/test_backend/test_directx/test_lexer.py
@@ -250,5 +250,21 @@ def test_double_dtype_tokenization():
         pytest.fail("double dtype tokenization is not implemented.")
 
 
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_directx/test_parser.py b/tests/test_backend/test_directx/test_parser.py
index efb59d2d..9460ca04 100644
--- a/tests/test_backend/test_directx/test_parser.py
+++ b/tests/test_backend/test_directx/test_parser.py
@@ -355,5 +355,18 @@ def test_double_dtype_parsing():
         pytest.fail("double dtype not implemented.")
 
 
+def test_mod_parsing():
+    code = """
+    void main() {
+        int a = 10 % 3;  // Basic modulus
+    }
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_metal/test_lexer.py b/tests/test_backend/test_metal/test_lexer.py
index eaaf4b57..e745ca77 100644
--- a/tests/test_backend/test_metal/test_lexer.py
+++ b/tests/test_backend/test_metal/test_lexer.py
@@ -126,5 +126,21 @@ def test_if_else_tokenization():
         pytest.fail("If-else statement tokenization not implemented.")
 
 
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_metal/test_parser.py b/tests/test_backend/test_metal/test_parser.py
index c30c416f..02286b72 100644
--- a/tests/test_backend/test_metal/test_parser.py
+++ b/tests/test_backend/test_metal/test_parser.py
@@ -153,5 +153,19 @@ def test_if_else():
         pytest.fail("If-else statement parsing not implemented.")
 
 
+def test_mod_parsing():
+    code = """
+    fragment float4 fragmentMain() {
+        int a = 10 % 3;  // Basic modulus
+        return float4(1.0);
+    }
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_mojo/test_lexer.py b/tests/test_backend/test_mojo/test_lexer.py
index 6c249050..e23afcef 100644
--- a/tests/test_backend/test_mojo/test_lexer.py
+++ b/tests/test_backend/test_mojo/test_lexer.py
@@ -9,9 +9,20 @@ def tokenize_code(code: str) -> List:
     return lexer.tokens
 
 
-# ToDO: Implement the tests
-def test_struct():
-    pass
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
 
 
 if __name__ == "__main__":
diff --git a/tests/test_backend/test_mojo/test_parser.py b/tests/test_backend/test_mojo/test_parser.py
index 36953cdc..295781b1 100644
--- a/tests/test_backend/test_mojo/test_parser.py
+++ b/tests/test_backend/test_mojo/test_parser.py
@@ -22,9 +22,16 @@ def tokenize_code(code: str) -> List:
     return lexer.tokens
 
 
-# ToDO: Implement the tests
-def test_struct():
-    pass
+def test_mod_parsing():
+    code = """
+    fn main():
+        let a: Int = 10 % 3  # Basic modulus
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_backend/test_opengl/test_lexer.py b/tests/test_backend/test_opengl/test_lexer.py
index fa72f512..a1129290 100644
--- a/tests/test_backend/test_opengl/test_lexer.py
+++ b/tests/test_backend/test_opengl/test_lexer.py
@@ -123,5 +123,21 @@ def test_double_dtype_tokenization():
         pytest.fail("double tokenization not implemented")
 
 
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_opengl/test_parser.py b/tests/test_backend/test_opengl/test_parser.py
index 661ccc25..79a107b5 100644
--- a/tests/test_backend/test_opengl/test_parser.py
+++ b/tests/test_backend/test_opengl/test_parser.py
@@ -197,5 +197,18 @@ def test_double_dtype_tokenization():
         pytest.fail("double tokenization not implemented")
 
 
+def test_mod_parsing():
+    code = """
+    void main() {
+        int a = 10 % 3;  // Basic modulus
+    }
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_slang/test_lexer.py b/tests/test_backend/test_slang/test_lexer.py
index adf8b744..a9bbd989 100644
--- a/tests/test_backend/test_slang/test_lexer.py
+++ b/tests/test_backend/test_slang/test_lexer.py
@@ -101,5 +101,21 @@ def test_function_call_tokenization():
         pytest.fail("Function call tokenization not implemented.")
 
 
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_slang/test_parser.py b/tests/test_backend/test_slang/test_parser.py
index 34976622..69fd232d 100644
--- a/tests/test_backend/test_slang/test_parser.py
+++ b/tests/test_backend/test_slang/test_parser.py
@@ -117,5 +117,21 @@ def test_function_call_parsing():
         pytest.fail("function call parsing not implemented.")
 
 
+def test_mod_parsing():
+    code = """
+    [shader("vertex")]
+    VertexStageOutput vertexMain(AssembledVertex assembledVertex) {
+        VertexStageOutput output;
+        int a = 10 % 3;  // Basic modulus
+        return output;
+    }
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/tests/test_backend/test_vulkan/test_lexer.py b/tests/test_backend/test_vulkan/test_lexer.py
index 82221481..9fabf09a 100644
--- a/tests/test_backend/test_vulkan/test_lexer.py
+++ b/tests/test_backend/test_vulkan/test_lexer.py
@@ -9,9 +9,20 @@ def tokenize_code(code: str) -> List:
     return lexer.tokens
 
 
-# ToDO: Implement the tests
-def test_struct():
-    pass
+def test_mod_tokenization():
+    code = """
+        int a = 10 % 3;  // Basic modulus
+    """
+    tokens = tokenize_code(code)
+
+    # Find the modulus operator in tokens
+    has_mod = False
+    for token in tokens:
+        if token == ("MOD", "%"):
+            has_mod = True
+            break
+
+    assert has_mod, "Modulus operator (%) not tokenized correctly"
 
 
 if __name__ == "__main__":
diff --git a/tests/test_backend/test_vulkan/test_parser.py b/tests/test_backend/test_vulkan/test_parser.py
index 736e78ba..d1af630c 100644
--- a/tests/test_backend/test_vulkan/test_parser.py
+++ b/tests/test_backend/test_vulkan/test_parser.py
@@ -22,9 +22,18 @@ def tokenize_code(code: str) -> List:
     return lexer.tokens
 
 
-# ToDO: Implement the tests
-def test_struct():
-    pass
+def test_mod_parsing():
+    code = """
+    
+    void main() {
+        int a = 10 % 3;  // Basic modulus
+    }
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operator parsing not implemented")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_translator/test_parser.py b/tests/test_translator/test_parser.py
index 8f498322..8f3ca4a9 100644
--- a/tests/test_translator/test_parser.py
+++ b/tests/test_translator/test_parser.py
@@ -586,5 +586,40 @@ def test_and_operator():
         pytest.fail("Bitwise AND not working")
 
 
+def test_modulus_operations():
+    code = """
+    shader main {
+    struct VSInput {
+        vec2 texCoord @ TEXCOORD0;
+    };
+    struct VSOutput {
+        vec4 color @ COLOR;
+    };
+    sampler2D iChannel0;
+    vertex {
+        VSOutput main(VSInput input) {
+            VSOutput output;
+            // Test modulus operations
+            int value = 1200;
+            value = value % 10;      // Basic modulus
+            value %= 5;              // Modulus assignment
+            output.color = vec4(float(value) / 10.0, 0.0, 0.0, 1.0);
+            return output;
+        }
+    }
+    fragment {
+        vec4 main(VSOutput input) @ gl_FragColor {
+            return vec4(input.color.rgb, 1.0);
+        }
+    }
+}
+    """
+    try:
+        tokens = tokenize_code(code)
+        parse_code(tokens)
+    except SyntaxError:
+        pytest.fail("Modulus operations not working")
+
+
 if __name__ == "__main__":
     pytest.main()

From f2e0cabcb60ee2c20452aa89e58a12467d7111aa Mon Sep 17 00:00:00 2001
From: sahithi aele <sahithiaelecloud@gmail.com>
Date: Fri, 3 Jan 2025 20:43:45 +0530
Subject: [PATCH 02/13] Fix Incorrect Regular Expression Patterns for WHILE and
 DO Tokens in DirectxLexer (#251)

* fix: resolved token matching order

* fix: correct regex patterns for WHILE and DO tokens
---
 crosstl/backend/DirectX/DirectxLexer.py | 4 ++--
 crosstl/translator/lexer.py             | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/crosstl/backend/DirectX/DirectxLexer.py b/crosstl/backend/DirectX/DirectxLexer.py
index 367549e8..88eadcce 100644
--- a/crosstl/backend/DirectX/DirectxLexer.py
+++ b/crosstl/backend/DirectX/DirectxLexer.py
@@ -21,8 +21,8 @@
     ("ELSE_IF", r"\belse\sif\b"),
     ("ELSE", r"\belse\b"),
     ("FOR", r"\bfor\b"),
-    ("WHILE", r"\b\while\b"),
-    ("DO", r"\b\do\b"),
+    ("WHILE", r"\bwhile\b"),
+    ("DO", r"\bdo\b"),
     ("REGISTER", r"\bregister\b"),
     ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
     ("NUMBER", r"\d+(\.\d+)?"),
diff --git a/crosstl/translator/lexer.py b/crosstl/translator/lexer.py
index ac89f534..21929417 100644
--- a/crosstl/translator/lexer.py
+++ b/crosstl/translator/lexer.py
@@ -1,6 +1,7 @@
 import re
 from collections import OrderedDict
 
+
 TOKENS = OrderedDict(
     [
         ("COMMENT_SINGLE", r"//.*"),
@@ -78,6 +79,7 @@
     ]
 )
 
+
 KEYWORDS = {
     "shader": "SHADER",
     "void": "VOID",

From cfd399eb752ab2fb60a58dd6cc5317e2fe8ffd64 Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 15:47:47 +0000
Subject: [PATCH 03/13] Chore: Add GitHub Actions workflow to merge main into
 PR branches

---
 .github/workflows/PR_merge_main.yml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 .github/workflows/PR_merge_main.yml

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
new file mode 100644
index 00000000..3266a4f1
--- /dev/null
+++ b/.github/workflows/PR_merge_main.yml
@@ -0,0 +1,23 @@
+name: PR Merge Main
+on:
+  pull_request:
+
+jobs:
+  update-pr:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Merge main into PR branch
+        run: |
+          git fetch origin main
+          git checkout ${{ github.head_ref }}
+          git merge origin/main
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Push changes
+        run: |
+          git push origin ${{ github.head_ref }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From f933145e748312f8576c7c8354680a2294e7c5c2 Mon Sep 17 00:00:00 2001
From: Inanna <95119297+InannaxX07@users.noreply.github.com>
Date: Fri, 3 Jan 2025 22:31:59 +0530
Subject: [PATCH 04/13] Optimize HLSL Lexer Performance (#249)

* Update DirectxLexer.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix the tests

* Fix tokenization method in HLSLLexer to use tokenize() instead of tokens

* test new merge action

* Fix GitHub Actions workflow to handle branch names correctly during push

* Refactor GitHub Actions workflow to fetch and merge PR branch with main

* Update GitHub Actions workflow to use ACCESS_TOKEN instead of GITHUB_TOKEN for pushing changes

* Update GitHub Actions workflow to use CROSSGL_TOKEN for authentication

* Update GitHub Actions workflow to push changes using CROSSGL_TOKEN for authentication

* Revert changes to workflow, will fix in different PR

---------

Co-authored-by: Nripesh Niketan <nripesh14@gmail.com>
---
 crosstl/backend/DirectX/DirectxLexer.py       | 209 ++++++++++--------
 .../test_backend/test_directx/test_codegen.py |   2 +-
 tests/test_backend/test_directx/test_lexer.py |   2 +-
 .../test_backend/test_directx/test_parser.py  |   2 +-
 4 files changed, 117 insertions(+), 98 deletions(-)

diff --git a/crosstl/backend/DirectX/DirectxLexer.py b/crosstl/backend/DirectX/DirectxLexer.py
index 88eadcce..a85cc9de 100644
--- a/crosstl/backend/DirectX/DirectxLexer.py
+++ b/crosstl/backend/DirectX/DirectxLexer.py
@@ -1,75 +1,11 @@
 import re
+from typing import Iterator, Tuple, List
 
-TOKENS = [
-    ("COMMENT_SINGLE", r"//.*"),
-    ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
-    ("INCLUDE", r"\#include\b"),
-    ("STRUCT", r"\bstruct\b"),
-    ("CBUFFER", r"\bcbuffer\b"),
-    ("TEXTURE2D", r"\bTexture2D\b"),
-    ("SAMPLER_STATE", r"\bSamplerState\b"),
-    ("FVECTOR", r"\bfloat[2-4]\b"),
-    ("FLOAT", r"\bfloat\b"),
-    ("DOUBLE", r"\bdouble\b"),
-    ("INT", r"\bint\b"),
-    ("UINT", r"\buint\b"),
-    ("BOOL", r"\bbool\b"),
-    ("MATRIX", r"\bfloat[2-4]x[2-4]\b"),
-    ("VOID", r"\bvoid\b"),
-    ("RETURN", r"\breturn\b"),
-    ("IF", r"\bif\b"),
-    ("ELSE_IF", r"\belse\sif\b"),
-    ("ELSE", r"\belse\b"),
-    ("FOR", r"\bfor\b"),
-    ("WHILE", r"\bwhile\b"),
-    ("DO", r"\bdo\b"),
-    ("REGISTER", r"\bregister\b"),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d+)?"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("LBRACKET", r"\["),
-    ("RBRACKET", r"\]"),
-    ("SEMICOLON", r";"),
-    ("COMMA", r","),
-    ("COLON", r":"),
-    ("QUESTION", r"\?"),
-    ("SHIFT_LEFT", r"<<"),
-    ("SHIFT_RIGHT", r">>"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("EQUAL", r"=="),
-    ("NOT_EQUAL", r"!="),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("ASSIGN_XOR", r"\^="),
-    ("ASSIGN_OR", r"\|="),
-    ("ASSIGN_AND", r"\&="),
-    ("BITWISE_XOR", r"\^"),
-    ("LOGICAL_AND", r"&&"),
-    ("LOGICAL_OR", r"\|\|"),
-    ("BITWISE_OR", r"\|"),
-    ("DOT", r"\."),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("EQUALS", r"="),
-    ("WHITESPACE", r"\s+"),
-    ("STRING", r"\"[^\"]*\""),
-    ("SWITCH", r"\bswitch\b"),
-    ("CASE", r"\bcase\b"),
-    ("DEFAULT", r"\bdefault\b"),
-    ("BREAK", r"\bbreak\b"),
-    ("MOD", r"%"),
-]
 
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+# define keywords dictionary
 KEYWORDS = {
     "struct": "STRUCT",
     "cbuffer": "CBUFFER",
@@ -97,38 +33,121 @@
     "break": "BREAK",
 }
 
+# use tuple for immutable token types that won't change
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"//.*"),
+        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
+        ("INCLUDE", r"\#include\b"),
+        ("STRUCT", r"\bstruct\b"),
+        ("CBUFFER", r"\bcbuffer\b"),
+        ("TEXTURE2D", r"\bTexture2D\b"),
+        ("SAMPLER_STATE", r"\bSamplerState\b"),
+        ("FVECTOR", r"\bfloat[2-4]\b"),
+        ("FLOAT", r"\bfloat\b"),
+        ("DOUBLE", r"\bdouble\b"),
+        ("INT", r"\bint\b"),
+        ("UINT", r"\buint\b"),
+        ("BOOL", r"\bbool\b"),
+        ("MATRIX", r"\bfloat[2-4]x[2-4]\b"),
+        ("VOID", r"\bvoid\b"),
+        ("RETURN", r"\breturn\b"),
+        ("IF", r"\bif\b"),
+        ("ELSE_IF", r"\belse\sif\b"),
+        ("ELSE", r"\belse\b"),
+        ("FOR", r"\bfor\b"),
+        ("WHILE", r"\bwhile\b"),
+        ("DO", r"\bdo\b"),
+        ("REGISTER", r"\bregister\b"),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d+)?"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("LBRACKET", r"\["),
+        ("RBRACKET", r"\]"),
+        ("SEMICOLON", r";"),
+        ("COMMA", r","),
+        ("COLON", r":"),
+        ("QUESTION", r"\?"),
+        ("SHIFT_LEFT", r"<<"),
+        ("SHIFT_RIGHT", r">>"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("EQUAL", r"=="),
+        ("NOT_EQUAL", r"!="),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("ASSIGN_XOR", r"\^="),
+        ("ASSIGN_OR", r"\|="),
+        ("ASSIGN_AND", r"\&="),
+        ("BITWISE_XOR", r"\^"),
+        ("LOGICAL_AND", r"&&"),
+        ("LOGICAL_OR", r"\|\|"),
+        ("BITWISE_OR", r"\|"),
+        ("DOT", r"\."),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("EQUALS", r"="),
+        ("WHITESPACE", r"\s+"),
+        ("STRING", r"\"[^\"]*\""),
+        ("SWITCH", r"\bswitch\b"),
+        ("CASE", r"\bcase\b"),
+        ("DEFAULT", r"\bdefault\b"),
+        ("BREAK", r"\bbreak\b"),
+        ("MOD", r"%"),
+    ]
+)
+
 
 class HLSLLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
 
-    def tokenize(self):
-        pos = 0
-        while pos < len(self.code):
-            match = None
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
 
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type not in [
-                        "WHITESPACE",
-                        "COMMENT_SINGLE",
-                        "COMMENT_MULTI",
-                    ]:
-                        token = (token_type, text)
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
+        pos = 0
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
                     f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
 
-        self.tokens.append(("EOF", ""))
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "HLSLLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/tests/test_backend/test_directx/test_codegen.py b/tests/test_backend/test_directx/test_codegen.py
index 199c4b89..505a08f0 100644
--- a/tests/test_backend/test_directx/test_codegen.py
+++ b/tests/test_backend/test_directx/test_codegen.py
@@ -19,7 +19,7 @@ def generate_code(ast_node):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = HLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_directx/test_lexer.py b/tests/test_backend/test_directx/test_lexer.py
index 83b3825a..85424921 100644
--- a/tests/test_backend/test_directx/test_lexer.py
+++ b/tests/test_backend/test_directx/test_lexer.py
@@ -6,7 +6,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = HLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_struct_tokenization():
diff --git a/tests/test_backend/test_directx/test_parser.py b/tests/test_backend/test_directx/test_parser.py
index 9460ca04..1128734c 100644
--- a/tests/test_backend/test_directx/test_parser.py
+++ b/tests/test_backend/test_directx/test_parser.py
@@ -19,7 +19,7 @@ def parse_code(tokens: List):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = HLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_struct_parsing():

From 782a7faf3e60f9c41b5c7b5e8543c37204123cbf Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <86844847+NripeshN@users.noreply.github.com>
Date: Fri, 3 Jan 2025 17:27:17 +0000
Subject: [PATCH 05/13] Fix: fix PR merge into main workflow

---
 .github/workflows/PR_merge_main.yml | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index 3266a4f1..1be76236 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -8,16 +8,27 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Merge main into PR branch
+      - name: Fetch PR branch and main
         run: |
+          # Fetch the PR branch and the main branch
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-branch
           git fetch origin main
-          git checkout ${{ github.head_ref }}
+
+      - name: Set git user
+        run: |
+          git config --global user.email "${{secrets.OWNER_EMAIL}}"
+          git config --global user.name "Nripesh Niketan"
+
+      - name: Merge main into PR branch
+        run: |
+          # Checkout the PR branch
+          git checkout pr-branch
+          # Merge the main branch into the PR branch
           git merge origin/main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Push changes
         run: |
-          git push origin ${{ github.head_ref }}
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # Push the updated PR branch back to the forked repository
+          git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}

From aacfd3678030ebac2637fd75878d52212e637d32 Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <86844847+NripeshN@users.noreply.github.com>
Date: Fri, 3 Jan 2025 17:58:13 +0000
Subject: [PATCH 06/13] Chore: Optimize backend Lexer Performance (#253)

* Chore: Optimize backend Lexer Performance

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Refactor: Update tokenization method in Metal and Mojo tests

* Refactor: Improve tokenization process in OpenGL lexer for better performance and clarity

* Chore: Update GitHub Actions workflow to fetch and merge main into PR branches

* Chore: Update GitHub Actions workflow to use token-based push for PR branches

* Chore: Update GitHub Actions workflow to use GITHUB_TOKEN for pushing changes

* Revert changes to yml

* Chore: Update PR merge workflow to fetch and merge branches more efficiently

* Chore: Update GitHub Actions workflow to use OWNER_TOKEN for pushing changes

* Chore: Update PR merge workflow to use GITHUB_TOKEN for pushing changes

* Chore: Move git user configuration to the appropriate step in PR merge workflow

* Chore: Update git user configuration to use secrets for email and change name

* Refactor: Improve SlangLexer tokenization process and update related tests

* Refactor: Enhance VulkanLexer tokenization with generator and improve performance
---
 crosstl/backend/Metal/MetalLexer.py           | 200 ++++++++--------
 crosstl/backend/Mojo/MojoLexer.py             | 170 +++++++-------
 crosstl/backend/Opengl/OpenglLexer.py         | 213 +++++++++--------
 crosstl/backend/Vulkan/VulkanLexer.py         | 176 +++++++-------
 crosstl/backend/slang/SlangLexer.py           | 214 ++++++++++--------
 tests/test_backend/test_metal/test_codegen.py |   2 +-
 tests/test_backend/test_metal/test_lexer.py   |   2 +-
 tests/test_backend/test_metal/test_parser.py  |   2 +-
 tests/test_backend/test_mojo/test_codegen.py  |   2 +-
 tests/test_backend/test_mojo/test_lexer.py    |   2 +-
 tests/test_backend/test_mojo/test_parser.py   |   2 +-
 .../test_backend/test_opengl/test_codegen.py  |   2 +-
 tests/test_backend/test_opengl/test_lexer.py  |   2 +-
 tests/test_backend/test_opengl/test_parser.py |   2 +-
 tests/test_backend/test_slang/test_codegen.py |   2 +-
 tests/test_backend/test_slang/test_lexer.py   |   2 +-
 tests/test_backend/test_slang/test_parser.py  |   2 +-
 .../test_backend/test_vulkan/test_codegen.py  |   2 +-
 tests/test_backend/test_vulkan/test_lexer.py  |   2 +-
 tests/test_backend/test_vulkan/test_parser.py |   2 +-
 20 files changed, 543 insertions(+), 460 deletions(-)

diff --git a/crosstl/backend/Metal/MetalLexer.py b/crosstl/backend/Metal/MetalLexer.py
index 8c0967dd..963b22c8 100644
--- a/crosstl/backend/Metal/MetalLexer.py
+++ b/crosstl/backend/Metal/MetalLexer.py
@@ -1,72 +1,78 @@
 import re
+from typing import Iterator, Tuple, List
 
-TOKENS = [
-    ("COMMENT_SINGLE", r"//.*"),
-    ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
-    ("PREPROCESSOR", r"#\w+"),
-    ("STRUCT", r"\bstruct\b"),
-    ("CONSTANT", r"\bconstant\b"),
-    ("TEXTURE2D", r"\btexture2d\b"),
-    ("buffer", r"\bbuffer\b"),
-    ("SAMPLER", r"\bsampler\b"),
-    ("VECTOR", r"\b(float|half|int|uint)[2-4]\b"),
-    ("FLOAT", r"\bfloat\b"),
-    ("HALF", r"\bhalf\b"),
-    ("INT", r"\bint\b"),
-    ("UINT", r"\buint\b"),
-    ("QUESTION", r"\?"),
-    ("BOOL", r"\bbool\b"),
-    ("VOID", r"\bvoid\b"),
-    ("RETURN", r"\breturn\b"),
-    ("ELSE_IF", r"\belse\s+if\b"),
-    ("IF", r"\bif\b"),
-    ("ELSE", r"\belse\b"),
-    ("FOR", r"\bfor\b"),
-    ("KERNEL", r"\bkernel\b"),
-    ("VERTEX", r"\bvertex\b"),
-    ("FRAGMENT", r"\bfragment\b"),
-    ("USING", r"\busing\b"),
-    ("NAMESPACE", r"\bnamespace\b"),
-    ("METAL", r"\bmetal\b"),
-    ("DEVICE", r"\bdevice\b"),
-    ("THREADGROUP", r"\bthreadgroup\b"),
-    ("THREAD", r"\bthread\b"),
-    ("ATTRIBUTE", r"\[\[.*?\]\]"),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d+)?"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("LBRACKET", r"\["),
-    ("RBRACKET", r"\]"),
-    ("SEMICOLON", r";"),
-    ("STRING", r'"[^"]*"'),
-    ("COMMA", r","),
-    ("COLON", r":"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("EQUAL", r"=="),
-    ("NOT_EQUAL", r"!="),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("AND", r"&&"),
-    ("OR", r"\|\|"),
-    ("DOT", r"\."),
-    ("EQUALS", r"="),
-    ("bitwise_and", r"&"),
-    ("WHITESPACE", r"\s+"),
-    ("MOD", r"%"),
-    ("ASSIGN_MOD", r"%="),
-]
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"//.*"),
+        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
+        ("PREPROCESSOR", r"#\w+"),
+        ("STRUCT", r"\bstruct\b"),
+        ("CONSTANT", r"\bconstant\b"),
+        ("TEXTURE2D", r"\btexture2d\b"),
+        ("buffer", r"\bbuffer\b"),
+        ("SAMPLER", r"\bsampler\b"),
+        ("VECTOR", r"\b(float|half|int|uint)[2-4]\b"),
+        ("FLOAT", r"\bfloat\b"),
+        ("HALF", r"\bhalf\b"),
+        ("INT", r"\bint\b"),
+        ("UINT", r"\buint\b"),
+        ("QUESTION", r"\?"),
+        ("BOOL", r"\bbool\b"),
+        ("VOID", r"\bvoid\b"),
+        ("RETURN", r"\breturn\b"),
+        ("ELSE_IF", r"\belse\s+if\b"),
+        ("IF", r"\bif\b"),
+        ("ELSE", r"\belse\b"),
+        ("FOR", r"\bfor\b"),
+        ("KERNEL", r"\bkernel\b"),
+        ("VERTEX", r"\bvertex\b"),
+        ("FRAGMENT", r"\bfragment\b"),
+        ("USING", r"\busing\b"),
+        ("NAMESPACE", r"\bnamespace\b"),
+        ("METAL", r"\bmetal\b"),
+        ("DEVICE", r"\bdevice\b"),
+        ("THREADGROUP", r"\bthreadgroup\b"),
+        ("THREAD", r"\bthread\b"),
+        ("ATTRIBUTE", r"\[\[.*?\]\]"),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d+)?"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("LBRACKET", r"\["),
+        ("RBRACKET", r"\]"),
+        ("SEMICOLON", r";"),
+        ("STRING", r'"[^"]*"'),
+        ("COMMA", r","),
+        ("COLON", r":"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("EQUAL", r"=="),
+        ("NOT_EQUAL", r"!="),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("AND", r"&&"),
+        ("OR", r"\|\|"),
+        ("DOT", r"\."),
+        ("EQUALS", r"="),
+        ("bitwise_and", r"&"),
+        ("WHITESPACE", r"\s+"),
+        ("MOD", r"%"),
+        ("ASSIGN_MOD", r"%="),
+    ]
+)
 
 KEYWORDS = {
     "struct": "STRUCT",
@@ -97,34 +103,46 @@
 
 
 class MetalLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
+
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
 
-    def tokenize(self):
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
         pos = 0
-        while pos < len(self.code):
-            match = None
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type not in [
-                        "WHITESPACE",
-                        "COMMENT_SINGLE",
-                        "COMMENT_MULTI",
-                    ]:
-                        token = (token_type, text)
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
                     f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
 
-        self.tokens.append(("EOF", ""))
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "MetalLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/crosstl/backend/Mojo/MojoLexer.py b/crosstl/backend/Mojo/MojoLexer.py
index b1395ec3..4faa8f3a 100644
--- a/crosstl/backend/Mojo/MojoLexer.py
+++ b/crosstl/backend/Mojo/MojoLexer.py
@@ -1,57 +1,63 @@
 import re
+from typing import Iterator, Tuple, List
+
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"#.*"),
+        ("COMMENT_MULTI", r'"""[\s\S]*?"""'),
+        ("STRUCT", r"\bstruct\b"),
+        ("LET", r"\blet\b"),
+        ("VAR", r"\bvar\b"),
+        ("FN", r"\bfn\b"),
+        ("RETURN", r"\breturn\b"),
+        ("IF", r"\bif\b"),
+        ("ELSE", r"\belse\b"),
+        ("FOR", r"\bfor\b"),
+        ("WHILE", r"\bwhile\b"),
+        ("IMPORT", r"\bimport\b"),
+        ("DEF", r"\bdef\b"),
+        ("INT", r"\bInt\b"),
+        ("FLOAT", r"\bFloat\b"),
+        ("BOOL", r"\bBool\b"),
+        ("STRING", r"\bString\b"),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d+)?"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("LBRACKET", r"\["),
+        ("RBRACKET", r"\]"),
+        ("SEMICOLON", r";"),
+        ("STRING_LITERAL", r'"[^"]*"'),
+        ("COMMA", r","),
+        ("COLON", r":"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("EQUAL", r"=="),
+        ("NOT_EQUAL", r"!="),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("AND", r"&&"),
+        ("OR", r"\|\|"),
+        ("DOT", r"\."),
+        ("EQUALS", r"="),
+        ("WHITESPACE", r"\s+"),
+        ("MOD", r"%"),
+    ]
+)
 
-# Define the tokens for Mojo syntax
-TOKENS = [
-    ("COMMENT_SINGLE", r"#.*"),
-    ("COMMENT_MULTI", r'"""[\s\S]*?"""'),
-    ("STRUCT", r"\bstruct\b"),
-    ("LET", r"\blet\b"),
-    ("VAR", r"\bvar\b"),
-    ("FN", r"\bfn\b"),
-    ("RETURN", r"\breturn\b"),
-    ("IF", r"\bif\b"),
-    ("ELSE", r"\belse\b"),
-    ("FOR", r"\bfor\b"),
-    ("WHILE", r"\bwhile\b"),
-    ("IMPORT", r"\bimport\b"),
-    ("DEF", r"\bdef\b"),
-    ("INT", r"\bInt\b"),
-    ("FLOAT", r"\bFloat\b"),
-    ("BOOL", r"\bBool\b"),
-    ("STRING", r"\bString\b"),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d+)?"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("LBRACKET", r"\["),
-    ("RBRACKET", r"\]"),
-    ("SEMICOLON", r";"),
-    ("STRING_LITERAL", r'"[^"]*"'),
-    ("COMMA", r","),
-    ("COLON", r":"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("EQUAL", r"=="),
-    ("NOT_EQUAL", r"!="),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("AND", r"&&"),
-    ("OR", r"\|\|"),
-    ("DOT", r"\."),
-    ("EQUALS", r"="),
-    ("WHITESPACE", r"\s+"),
-    ("MOD", r"%"),
-]
 
 # Define keywords specific to mojo
 KEYWORDS = {
@@ -74,34 +80,46 @@
 
 
 class MojoLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
 
-    def tokenize(self):
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
+
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
         pos = 0
-        while pos < len(self.code):
-            match = None
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type not in [
-                        "WHITESPACE",
-                        "COMMENT_SINGLE",
-                        "COMMENT_MULTI",
-                    ]:
-                        token = (token_type, text)
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
                     f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
 
-        self.tokens.append(("EOF", ""))
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "MojoLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/crosstl/backend/Opengl/OpenglLexer.py b/crosstl/backend/Opengl/OpenglLexer.py
index 5a1a18af..41f8f9f5 100644
--- a/crosstl/backend/Opengl/OpenglLexer.py
+++ b/crosstl/backend/Opengl/OpenglLexer.py
@@ -1,78 +1,84 @@
 import re
+from typing import Iterator, Tuple, List
 
-TOKENS = [
-    ("COMMENT_SINGLE", r"//.*"),
-    ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
-    ("VERSION", r"#version"),
-    ("PREPROCESSOR", r"#\w+"),
-    ("CONSTANT", r"\bconst\b"),
-    ("STRUCT", r"\bstruct\b"),
-    ("UNIFORM", r"\buniform\b"),
-    ("SAMPLER2D", r"\bsampler2D\b"),
-    ("SAMPLERCUBE", r"\bsamplerCube\b"),
-    ("BUFFER", r"\bbuffer\b"),
-    ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"),
-    ("MATRIX", r"\bmat[234](x[234])?\b"),
-    ("FLOAT", r"\bfloat\b"),
-    ("INT", r"\bint\b"),
-    ("DOUBLE", r"\bdouble\b"),
-    ("UINT", r"\buint\b"),
-    ("BOOL", r"\bbool\b"),
-    ("VOID", r"\bvoid\b"),
-    ("RETURN", r"\breturn\b"),
-    ("IF", r"\bif\b"),
-    ("ELSE", r"\belse\b"),
-    ("FOR", r"\bfor\b"),
-    ("WHILE", r"\bwhile\b"),
-    ("DO", r"\bdo\b"),
-    ("IN", r"\bin\b"),
-    ("OUT", r"\bout\b"),
-    ("INOUT", r"\binout\b"),
-    ("LAYOUT", r"\blayout\b"),
-    ("ATTRIBUTE", r"\battribute\b"),
-    ("VARYING", r"\bvarying\b"),
-    ("CONST", r"\bconst\b"),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("LBRACKET", r"\["),
-    ("RBRACKET", r"\]"),
-    ("SEMICOLON", r";"),
-    ("STRING", r'"[^"]*"'),
-    ("COMMA", r","),
-    ("COLON", r":"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("EQUAL", r"=="),
-    ("NOT_EQUAL", r"!="),
-    ("ASSIGN_AND", r"&="),
-    ("ASSIGN_OR", r"\|="),
-    ("ASSIGN_XOR", r"\^="),
-    ("LOGICAL_AND", r"&&"),
-    ("LOGICAL_OR", r"\|\|"),
-    ("ASSIGN_MOD", r"%="),
-    ("MOD", r"%"),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("DOT", r"\."),
-    ("EQUALS", r"="),
-    ("BITWISE_AND", r"&"),
-    ("BITWISE_OR", r"\|"),
-    ("BITWISE_XOR", r"\^"),
-    ("BITWISE_NOT", r"~"),
-    ("WHITESPACE", r"\s+"),
-]
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"//.*"),
+        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
+        ("VERSION", r"#version"),
+        ("PREPROCESSOR", r"#\w+"),
+        ("CONSTANT", r"\bconst\b"),
+        ("STRUCT", r"\bstruct\b"),
+        ("UNIFORM", r"\buniform\b"),
+        ("SAMPLER2D", r"\bsampler2D\b"),
+        ("SAMPLERCUBE", r"\bsamplerCube\b"),
+        ("BUFFER", r"\bbuffer\b"),
+        ("VECTOR", r"\b(vec|ivec|uvec|bvec)[234]\b"),
+        ("MATRIX", r"\bmat[234](x[234])?\b"),
+        ("FLOAT", r"\bfloat\b"),
+        ("INT", r"\bint\b"),
+        ("DOUBLE", r"\bdouble\b"),
+        ("UINT", r"\buint\b"),
+        ("BOOL", r"\bbool\b"),
+        ("VOID", r"\bvoid\b"),
+        ("RETURN", r"\breturn\b"),
+        ("IF", r"\bif\b"),
+        ("ELSE", r"\belse\b"),
+        ("FOR", r"\bfor\b"),
+        ("WHILE", r"\bwhile\b"),
+        ("DO", r"\bdo\b"),
+        ("IN", r"\bin\b"),
+        ("OUT", r"\bout\b"),
+        ("INOUT", r"\binout\b"),
+        ("LAYOUT", r"\blayout\b"),
+        ("ATTRIBUTE", r"\battribute\b"),
+        ("VARYING", r"\bvarying\b"),
+        ("CONST", r"\bconst\b"),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d+)?([eE][+-]?\d+)?"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("LBRACKET", r"\["),
+        ("RBRACKET", r"\]"),
+        ("SEMICOLON", r";"),
+        ("STRING", r'"[^"]*"'),
+        ("COMMA", r","),
+        ("COLON", r":"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("EQUAL", r"=="),
+        ("NOT_EQUAL", r"!="),
+        ("ASSIGN_AND", r"&="),
+        ("ASSIGN_OR", r"\|="),
+        ("ASSIGN_XOR", r"\^="),
+        ("LOGICAL_AND", r"&&"),
+        ("LOGICAL_OR", r"\|\|"),
+        ("ASSIGN_MOD", r"%="),
+        ("MOD", r"%"),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("DOT", r"\."),
+        ("EQUALS", r"="),
+        ("BITWISE_AND", r"&"),
+        ("BITWISE_OR", r"\|"),
+        ("BITWISE_XOR", r"\^"),
+        ("BITWISE_NOT", r"~"),
+        ("WHITESPACE", r"\s+"),
+    ]
+)
 
 KEYWORDS = {
     "struct": "STRUCT",
@@ -103,33 +109,46 @@
 
 
 class GLSLLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
+
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
 
-    def tokenize(self):
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
         pos = 0
-        while pos < len(self.code):
-            match = None
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type not in [
-                        "WHITESPACE",
-                        "COMMENT_SINGLE",
-                        "COMMENT_MULTI",
-                    ]:
-                        token = (token_type, text)
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
                     f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
-        self.tokens.append(("EOF", ""))
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
+
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "GLSLLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/crosstl/backend/Vulkan/VulkanLexer.py b/crosstl/backend/Vulkan/VulkanLexer.py
index 4b2e5d8b..b89f4d58 100644
--- a/crosstl/backend/Vulkan/VulkanLexer.py
+++ b/crosstl/backend/Vulkan/VulkanLexer.py
@@ -1,56 +1,62 @@
 import re
+from typing import Iterator, Tuple, List
 
-TOKENS = [
-    ("COMMENT_SINGLE", r"//.*"),
-    ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
-    ("WHITESPACE", r"\s+"),
-    ("SEMANTIC", r":\w+"),
-    ("PRE_INCREMENT", r"\+\+(?=\w)"),
-    ("PRE_DECREMENT", r"--(?=\w)"),
-    ("POST_INCREMENT", r"(?<=\w)\+\+"),
-    ("POST_DECREMENT", r"(?<=\w)--"),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d*)?|\.\d+"),
-    ("SEMICOLON", r";"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("COMMA", r","),
-    ("DOT", r"\."),
-    ("EQUAL", r"=="),
-    ("ASSIGN_AND", r"&="),
-    ("ASSIGN_OR", r"\|="),
-    ("ASSIGN_XOR", r"\^="),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("ASSIGN_MOD", r"%="),
-    ("ASSIGN_SHIFT_LEFT", r"<<="),
-    ("ASSIGN_SHIFT_RIGHT", r">>="),
-    ("BITWISE_SHIFT_LEFT", r"<<"),
-    ("BITWISE_SHIFT_RIGHT", r">>"),
-    ("EQUALS", r"="),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("NOT_EQUAL", r"!="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("AND", r"&&"),
-    ("OR", r"\|\|"),
-    ("BINARY_AND", r"&"),
-    ("BINARY_OR", r"\|"),
-    ("BINARY_XOR", r"\^"),
-    ("BINARY_NOT", r"~"),
-    ("QUESTION", r"\?"),
-    ("COLON", r":"),
-    ("MOD", r"%"),
-]
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"//.*"),
+        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
+        ("WHITESPACE", r"\s+"),
+        ("SEMANTIC", r":\w+"),
+        ("PRE_INCREMENT", r"\+\+(?=\w)"),
+        ("PRE_DECREMENT", r"--(?=\w)"),
+        ("POST_INCREMENT", r"(?<=\w)\+\+"),
+        ("POST_DECREMENT", r"(?<=\w)--"),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d*)?|\.\d+"),
+        ("SEMICOLON", r";"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("COMMA", r","),
+        ("DOT", r"\."),
+        ("EQUAL", r"=="),
+        ("ASSIGN_AND", r"&="),
+        ("ASSIGN_OR", r"\|="),
+        ("ASSIGN_XOR", r"\^="),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("ASSIGN_MOD", r"%="),
+        ("ASSIGN_SHIFT_LEFT", r"<<="),
+        ("ASSIGN_SHIFT_RIGHT", r">>="),
+        ("BITWISE_SHIFT_LEFT", r"<<"),
+        ("BITWISE_SHIFT_RIGHT", r">>"),
+        ("EQUALS", r"="),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("NOT_EQUAL", r"!="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("AND", r"&&"),
+        ("OR", r"\|\|"),
+        ("BINARY_AND", r"&"),
+        ("BINARY_OR", r"\|"),
+        ("BINARY_XOR", r"\^"),
+        ("BINARY_NOT", r"~"),
+        ("QUESTION", r"\?"),
+        ("COLON", r":"),
+        ("MOD", r"%"),
+    ]
+)
 
 KEYWORDS = {
     "struct": "STRUCT",
@@ -138,40 +144,46 @@
 
 
 class VulkanLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
+
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
 
-    def tokenize(self):
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
         pos = 0
-        while pos < len(self.code):
-            match = None
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type == "VERSION":
-                        self.tokens.append((token_type, text))
-                    elif token_type == "VERSION_NUMBER":
-                        self.tokens.append((token_type, text))
-                    elif token_type == "CORE":
-                        self.tokens.append((token_type, text))
-                    elif token_type != "WHITESPACE":  # Ignore whitespace tokens
-                        token = (token_type, text)
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
-                unmatched_char = self.code[pos]
-                highlighted_code = (
-                    self.code[:pos] + "[" + self.code[pos] + "]" + self.code[pos + 1 :]
-                )
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
-                    f"Illegal character '{unmatched_char}' at position {pos}\n{highlighted_code}"
+                    f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
 
-        self.tokens.append(("EOF", None))
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "VulkanLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/crosstl/backend/slang/SlangLexer.py b/crosstl/backend/slang/SlangLexer.py
index 9d8808b1..44dc39f4 100644
--- a/crosstl/backend/slang/SlangLexer.py
+++ b/crosstl/backend/slang/SlangLexer.py
@@ -1,80 +1,83 @@
 import re
+from typing import Iterator, Tuple, List
 
-# Token definitions
-TOKENS = [
-    ("COMMENT_SINGLE", r"//.*"),
-    ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
-    ("STRUCT", r"\bstruct\b"),
-    ("CBUFFER", r"\bcbuffer\b"),
-    ("TYPE_SHADER", r'\[shader\("(vertex|fragment|compute)"\)\]'),
-    ("SHADER", r"\bshader\b"),
-    ("STRING", r'"(?:\\.|[^"\\])*"'),
-    ("TEXTURE2D", r"\bTexture2D\b"),
-    ("SAMPLER_STATE", r"\bSamplerState\b"),
-    ("FVECTOR", r"\bfloat[2-4]\b"),
-    ("FLOAT", r"\bfloat\b"),
-    ("INT", r"\bint\b"),
-    ("UINT", r"\buint\b"),
-    ("BOOL", r"\bbool\b"),
-    ("MATRIX", r"\bfloat[2-4]x[2-4]\b"),
-    ("VOID", r"\bvoid\b"),
-    ("RETURN", r"\breturn\b"),
-    ("IF", r"\bif\b"),
-    ("ELSE_IF", r"\belse\s+if\b"),
-    ("ELSE", r"\belse\b"),
-    ("FOR", r"\bfor\b"),
-    ("WHILE", r"\bwhile\b"),
-    ("DO", r"\bdo\b"),
-    ("SWITCH", r"\bswitch\b"),
-    ("CASE", r"\bcase\b"),
-    ("DEFAULT", r"\bdefault\b"),
-    ("BREAK", r"\bbreak\b"),
-    ("CONTINUE", r"\bcontinue\b"),
-    ("REGISTER", r"\bregister\b"),
-    ("STRING", r'"[^"]*"'),
-    ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
-    ("NUMBER", r"\d+(\.\d+)?"),
-    ("LBRACE", r"\{"),
-    ("RBRACE", r"\}"),
-    ("LPAREN", r"\("),
-    ("RPAREN", r"\)"),
-    ("LBRACKET", r"\["),
-    ("RBRACKET", r"\]"),
-    ("SEMICOLON", r";"),
-    ("COMMA", r","),
-    ("COLON", r":"),  # Separate token for single colon, if needed separately
-    ("QUESTION", r"\?"),
-    ("LESS_EQUAL", r"<="),
-    ("GREATER_EQUAL", r">="),
-    ("LESS_THAN", r"<"),
-    ("GREATER_THAN", r">"),
-    ("EQUAL", r"=="),
-    ("NOT_EQUAL", r"!="),
-    ("PLUS_EQUALS", r"\+="),
-    ("MINUS_EQUALS", r"-="),
-    ("MULTIPLY_EQUALS", r"\*="),
-    ("DIVIDE_EQUALS", r"/="),
-    ("AND", r"&&"),
-    ("OR", r"\|\|"),
-    ("DOT", r"\."),
-    ("MULTIPLY", r"\*"),
-    ("DIVIDE", r"/"),
-    ("PLUS", r"\+"),
-    ("MINUS", r"-"),
-    ("EQUALS", r"="),
-    ("WHITESPACE", r"\s+"),
-    # Slang-specific tokens
-    ("IMPORT", r"\bimport\b"),
-    ("EXPORT", r"\bexport\b"),
-    ("GENERIC", r"\b__generic\b"),
-    ("EXTENSION", r"\bextension\b"),
-    ("TYPEDEF", r"\btypedef\b"),
-    ("CONSTEXPR", r"\bconstexpr\b"),
-    ("STATIC", r"\bstatic\b"),
-    ("INLINE", r"\binline\b"),
-    ("MOD", r"%"),  # Add modulus operator
-    # Add modulus assignment
-]
+# using sets for faster lookup
+SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
+
+TOKENS = tuple(
+    [
+        ("COMMENT_SINGLE", r"//.*"),
+        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
+        ("STRUCT", r"\bstruct\b"),
+        ("CBUFFER", r"\bcbuffer\b"),
+        ("TYPE_SHADER", r'\[shader\("(vertex|fragment|compute)"\)\]'),
+        ("SHADER", r"\bshader\b"),
+        ("STRING", r'"(?:\\.|[^"\\])*"'),
+        ("TEXTURE2D", r"\bTexture2D\b"),
+        ("SAMPLER_STATE", r"\bSamplerState\b"),
+        ("FVECTOR", r"\bfloat[2-4]\b"),
+        ("FLOAT", r"\bfloat\b"),
+        ("INT", r"\bint\b"),
+        ("UINT", r"\buint\b"),
+        ("BOOL", r"\bbool\b"),
+        ("MATRIX", r"\bfloat[2-4]x[2-4]\b"),
+        ("VOID", r"\bvoid\b"),
+        ("RETURN", r"\breturn\b"),
+        ("IF", r"\bif\b"),
+        ("ELSE_IF", r"\belse\s+if\b"),
+        ("ELSE", r"\belse\b"),
+        ("FOR", r"\bfor\b"),
+        ("WHILE", r"\bwhile\b"),
+        ("DO", r"\bdo\b"),
+        ("SWITCH", r"\bswitch\b"),
+        ("CASE", r"\bcase\b"),
+        ("DEFAULT", r"\bdefault\b"),
+        ("BREAK", r"\bbreak\b"),
+        ("CONTINUE", r"\bcontinue\b"),
+        ("REGISTER", r"\bregister\b"),
+        ("STRING", r'"[^"]*"'),
+        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
+        ("NUMBER", r"\d+(\.\d+)?"),
+        ("LBRACE", r"\{"),
+        ("RBRACE", r"\}"),
+        ("LPAREN", r"\("),
+        ("RPAREN", r"\)"),
+        ("LBRACKET", r"\["),
+        ("RBRACKET", r"\]"),
+        ("SEMICOLON", r";"),
+        ("COMMA", r","),
+        ("COLON", r":"),
+        ("QUESTION", r"\?"),
+        ("LESS_EQUAL", r"<="),
+        ("GREATER_EQUAL", r">="),
+        ("LESS_THAN", r"<"),
+        ("GREATER_THAN", r">"),
+        ("EQUAL", r"=="),
+        ("NOT_EQUAL", r"!="),
+        ("PLUS_EQUALS", r"\+="),
+        ("MINUS_EQUALS", r"-="),
+        ("MULTIPLY_EQUALS", r"\*="),
+        ("DIVIDE_EQUALS", r"/="),
+        ("AND", r"&&"),
+        ("OR", r"\|\|"),
+        ("DOT", r"\."),
+        ("MULTIPLY", r"\*"),
+        ("DIVIDE", r"/"),
+        ("PLUS", r"\+"),
+        ("MINUS", r"-"),
+        ("EQUALS", r"="),
+        ("WHITESPACE", r"\s+"),
+        ("IMPORT", r"\bimport\b"),
+        ("EXPORT", r"\bexport\b"),
+        ("GENERIC", r"\b__generic\b"),
+        ("EXTENSION", r"\bextension\b"),
+        ("TYPEDEF", r"\btypedef\b"),
+        ("CONSTEXPR", r"\bconstexpr\b"),
+        ("STATIC", r"\bstatic\b"),
+        ("INLINE", r"\binline\b"),
+        ("MOD", r"%"),
+    ]
+)
 
 # Keywords map for matching identifiers to token types
 KEYWORDS = {
@@ -114,33 +117,46 @@
 
 
 class SlangLexer:
-    def __init__(self, code):
+    def __init__(self, code: str):
+        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
         self.code = code
-        self.tokens = []
-        self.tokenize()
+        self._length = len(code)
+
+    def tokenize(self) -> List[Tuple[str, str]]:
+        # tokenize the input code and return list of tokens
+        return list(self.token_generator())
 
-    def tokenize(self):
+    def token_generator(self) -> Iterator[Tuple[str, str]]:
+        # function that yields tokens one at a time
         pos = 0
-        while pos < len(self.code):
-            match = None
-            for token_type, pattern in TOKENS:
-                regex = re.compile(pattern)
-                match = regex.match(self.code, pos)
-                if match:
-                    text = match.group(0)
-                    if token_type == "IDENTIFIER" and text in KEYWORDS:
-                        token_type = KEYWORDS[text]
-                    if token_type not in [
-                        "WHITESPACE",
-                        "COMMENT_SINGLE",
-                        "COMMENT_MULTI",
-                    ]:
-                        token = (token_type, text.strip())
-                        self.tokens.append(token)
-                    pos = match.end(0)
-                    break
-            if not match:
+        while pos < self._length:
+            token = self._next_token(pos)
+            if token is None:
                 raise SyntaxError(
                     f"Illegal character '{self.code[pos]}' at position {pos}"
                 )
-        self.tokens.append(("EOF", ""))
+            new_pos, token_type, text = token
+
+            if token_type == "IDENTIFIER" and text in KEYWORDS:
+                token_type = KEYWORDS[text]
+
+            if token_type not in SKIP_TOKENS:
+                yield (token_type, text)
+
+            pos = new_pos
+
+        yield ("EOF", "")
+
+    def _next_token(self, pos: int) -> Tuple[int, str, str]:
+        # find the next token starting at the given position
+        for token_type, pattern in self._token_patterns:
+            match = pattern.match(self.code, pos)
+            if match:
+                return match.end(0), token_type, match.group(0)
+        return None
+
+    @classmethod
+    def from_file(cls, filepath: str, chunk_size: int = 8192) -> "SlangLexer":
+        # create a lexer instance from a file, reading in chunks
+        with open(filepath, "r") as f:
+            return cls(f.read())
diff --git a/tests/test_backend/test_metal/test_codegen.py b/tests/test_backend/test_metal/test_codegen.py
index 72e4f74d..12a0426a 100644
--- a/tests/test_backend/test_metal/test_codegen.py
+++ b/tests/test_backend/test_metal/test_codegen.py
@@ -8,7 +8,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MetalLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_metal/test_lexer.py b/tests/test_backend/test_metal/test_lexer.py
index e745ca77..8ea7b838 100644
--- a/tests/test_backend/test_metal/test_lexer.py
+++ b/tests/test_backend/test_metal/test_lexer.py
@@ -6,7 +6,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MetalLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_struct_tokenization():
diff --git a/tests/test_backend/test_metal/test_parser.py b/tests/test_backend/test_metal/test_parser.py
index 02286b72..15b2f6ff 100644
--- a/tests/test_backend/test_metal/test_parser.py
+++ b/tests/test_backend/test_metal/test_parser.py
@@ -7,7 +7,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MetalLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_mojo/test_codegen.py b/tests/test_backend/test_mojo/test_codegen.py
index 173f8175..7ca911b5 100644
--- a/tests/test_backend/test_mojo/test_codegen.py
+++ b/tests/test_backend/test_mojo/test_codegen.py
@@ -16,7 +16,7 @@ def generate_code(ast_node):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MojoLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_mojo/test_lexer.py b/tests/test_backend/test_mojo/test_lexer.py
index e23afcef..95f7d18b 100644
--- a/tests/test_backend/test_mojo/test_lexer.py
+++ b/tests/test_backend/test_mojo/test_lexer.py
@@ -6,7 +6,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MojoLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_mod_tokenization():
diff --git a/tests/test_backend/test_mojo/test_parser.py b/tests/test_backend/test_mojo/test_parser.py
index 295781b1..4759ac1a 100644
--- a/tests/test_backend/test_mojo/test_parser.py
+++ b/tests/test_backend/test_mojo/test_parser.py
@@ -19,7 +19,7 @@ def parse_code(tokens: List):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = MojoLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_mod_parsing():
diff --git a/tests/test_backend/test_opengl/test_codegen.py b/tests/test_backend/test_opengl/test_codegen.py
index bbb83b31..760bfe00 100644
--- a/tests/test_backend/test_opengl/test_codegen.py
+++ b/tests/test_backend/test_opengl/test_codegen.py
@@ -16,7 +16,7 @@ def tokenize_code(code: str) -> List:
 
     """
     lexer = GLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(Tokens: List, shader_type="vertex") -> List:
diff --git a/tests/test_backend/test_opengl/test_lexer.py b/tests/test_backend/test_opengl/test_lexer.py
index a1129290..5226520b 100644
--- a/tests/test_backend/test_opengl/test_lexer.py
+++ b/tests/test_backend/test_opengl/test_lexer.py
@@ -14,7 +14,7 @@ def tokenize_code(code: str) -> List:
 
     """
     lexer = GLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_input_output_tokenization():
diff --git a/tests/test_backend/test_opengl/test_parser.py b/tests/test_backend/test_opengl/test_parser.py
index 79a107b5..59514061 100644
--- a/tests/test_backend/test_opengl/test_parser.py
+++ b/tests/test_backend/test_opengl/test_parser.py
@@ -15,7 +15,7 @@ def tokenize_code(code: str) -> List:
 
     """
     lexer = GLSLLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(Tokens: List, shader_type="vertex") -> List:
diff --git a/tests/test_backend/test_slang/test_codegen.py b/tests/test_backend/test_slang/test_codegen.py
index 6a00430f..cb5387fe 100644
--- a/tests/test_backend/test_slang/test_codegen.py
+++ b/tests/test_backend/test_slang/test_codegen.py
@@ -19,7 +19,7 @@ def generate_code(ast_node):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = SlangLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_slang/test_lexer.py b/tests/test_backend/test_slang/test_lexer.py
index a9bbd989..1ef8bd23 100644
--- a/tests/test_backend/test_slang/test_lexer.py
+++ b/tests/test_backend/test_slang/test_lexer.py
@@ -6,7 +6,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = SlangLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_struct_tokenization():
diff --git a/tests/test_backend/test_slang/test_parser.py b/tests/test_backend/test_slang/test_parser.py
index 69fd232d..742f597c 100644
--- a/tests/test_backend/test_slang/test_parser.py
+++ b/tests/test_backend/test_slang/test_parser.py
@@ -19,7 +19,7 @@ def parse_code(tokens: List):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = SlangLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_struct_parsing():
diff --git a/tests/test_backend/test_vulkan/test_codegen.py b/tests/test_backend/test_vulkan/test_codegen.py
index 6e2c91c4..71a274d8 100644
--- a/tests/test_backend/test_vulkan/test_codegen.py
+++ b/tests/test_backend/test_vulkan/test_codegen.py
@@ -16,7 +16,7 @@ def generate_code(ast_node):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = VulkanLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def parse_code(tokens: List):
diff --git a/tests/test_backend/test_vulkan/test_lexer.py b/tests/test_backend/test_vulkan/test_lexer.py
index 9fabf09a..e68db359 100644
--- a/tests/test_backend/test_vulkan/test_lexer.py
+++ b/tests/test_backend/test_vulkan/test_lexer.py
@@ -6,7 +6,7 @@
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = VulkanLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_mod_tokenization():
diff --git a/tests/test_backend/test_vulkan/test_parser.py b/tests/test_backend/test_vulkan/test_parser.py
index d1af630c..ec7789aa 100644
--- a/tests/test_backend/test_vulkan/test_parser.py
+++ b/tests/test_backend/test_vulkan/test_parser.py
@@ -19,7 +19,7 @@ def parse_code(tokens: List):
 def tokenize_code(code: str) -> List:
     """Helper function to tokenize code."""
     lexer = VulkanLexer(code)
-    return lexer.tokens
+    return lexer.tokenize()
 
 
 def test_mod_parsing():

From 0e42f94c3688849a5f47f4df47952b29071e5769 Mon Sep 17 00:00:00 2001
From: Vruddhi Shah <vhs180903@gmail.com>
Date: Fri, 3 Jan 2025 23:47:18 +0530
Subject: [PATCH 07/13] =?UTF-8?q?Refactor=20code=20generation=20logic=20fo?=
 =?UTF-8?q?r=20improved=20readability=20and=20maintaina=E2=80=A6=20(#252)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Refactor code generation logic for improved readability and maintainability

Replaced repeated shader type checks in `generate()` with a dictionary-based approach, improving clarity and reducing redundancy.
- Simplified the `generate_if()` method to handle `else_if_conditions` and `else_if_bodies` more clearly, enhancing readability.
- Refactored `generate_statement()` by using a dictionary-based mapping of node types to their corresponding handlers, reducing conditional branching and improving code organization.

These changes streamline the code generation logic, making the codebase easier to understand and extend in the future.

* Refactor code generation logic for improved readability and maintainability

- Replaced repeated shader type checks in `generate()` with a dictionary-based approach, improving clarity and reducing redundancy.
- Simplified the `generate_if()` method to handle `else_if_conditions` and `else_if_bodies` more clearly, enhancing readability.
- Refactored `generate_statement()` by using a dictionary-based mapping of node types to their corresponding handlers, reducing conditional branching and improving code organization.

These changes streamline the code generation logic, making the codebase easier to understand and extend in the future.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixed Imports and the errors.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update directx_codegen.py

* Update directx_codegen.py

* Update directx_codegen.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update directx_codegen.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Nripesh Niketan <nripesh14@gmail.com>
---
 crosstl/translator/codegen/directx_codegen.py | 83 ++++++++-----------
 1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/crosstl/translator/codegen/directx_codegen.py b/crosstl/translator/codegen/directx_codegen.py
index 7780d31a..7cc517f0 100644
--- a/crosstl/translator/codegen/directx_codegen.py
+++ b/crosstl/translator/codegen/directx_codegen.py
@@ -43,19 +43,16 @@ def __init__(self):
         }
 
         self.semantic_map = {
-            # Vertex inputs instance
             "gl_VertexID": "SV_VertexID",
             "gl_InstanceID": "SV_InstanceID",
             "gl_IsFrontFace": "FRONT_FACE",
             "gl_PrimitiveID": "PRIMITIVE_ID",
             "InstanceID": "INSTANCE_ID",
             "VertexID": "VERTEX_ID",
-            # Vertex outputs
             "gl_Position": "SV_POSITION",
             "gl_PointSize": "SV_POINTSIZE",
             "gl_ClipDistance": "SV_ClipDistance",
             "gl_CullDistance": "SV_CullDistance",
-            # Fragment inputs
             "gl_FragColor": "SV_TARGET",
             "gl_FragColor0": "SV_TARGET0",
             "gl_FragColor1": "SV_TARGET1",
@@ -66,10 +63,6 @@ def __init__(self):
             "gl_FragColor6": "SV_TARGET6",
             "gl_FragColor7": "SV_TARGET7",
             "gl_FragDepth": "SV_DEPTH",
-            "gl_FragDepth0": "SV_DEPTH0",
-            "gl_FragDepth1": "SV_DEPTH1",
-            "gl_FragDepth2": "SV_DEPTH2",
-            "gl_FragDepth3": "SV_DEPTH3",
         }
 
     def generate(self, ast):
@@ -83,7 +76,6 @@ def generate(self, ast):
                 code += "}\n"
 
         # Generate global variables
-
         for i, node in enumerate(ast.global_variables):
             if node.vtype in ["sampler2D", "samplerCube"]:
                 code += "// Texture Samplers\n"
@@ -93,6 +85,7 @@ def generate(self, ast):
                 code += f"{self.map_type(node.vtype)} {node.name} :register(s{i});\n"
             else:
                 code += f"{self.map_type(node.vtype)} {node.name};\n"
+
         # Generate cbuffers
         if ast.cbuffers:
             code += "// Constant Buffers\n"
@@ -106,7 +99,6 @@ def generate(self, ast):
             elif func.qualifier == "fragment":
                 code += "// Fragment Shader\n"
                 code += self.generate_function(func, shader_type="fragment")
-
             elif func.qualifier == "compute":
                 code += "// Compute Shader\n"
                 code += self.generate_function(func, shader_type="compute")
@@ -132,38 +124,32 @@ def generate_function(self, func, indent=0, shader_type=None):
             f"{self.map_type(p.vtype)} {p.name} {self.map_semantic(p.semantic)}"
             for p in func.params
         )
-        if shader_type == "vertex":
-            code += f"{self.map_type(func.return_type)} VSMain({params}) {self.map_semantic(func.semantic)} {{\n"
-        elif shader_type == "fragment":
-            code += f"{self.map_type(func.return_type)} PSMain({params}) {self.map_semantic(func.semantic)} {{\n"
-        elif shader_type == "compute":
-            code += f"{self.map_type(func.return_type)} CSMain({params}) {self.map_semantic(func.semantic)} {{\n"
+        shader_map = {"vertex": "VSMain", "fragment": "PSMain", "compute": "CSMain"}
+
+        if func.qualifier in shader_map:
+            code += f"// {func.qualifier.capitalize()} Shader\n"
+            code += f"{self.map_type(func.return_type)} {shader_map[func.qualifier]}({params}) {{\n"
         else:
-            code += f"{self.map_type(func.return_type)} {func.name}({params}) {self.map_semantic(func.semantic)} {{\n"
+            code += f"{self.map_type(func.return_type)} {func.name}({params}) {{\n"
 
         for stmt in func.body:
-            code += self.generate_statement(stmt, 1)
-        code += "}\n\n"
-
+            code += self.generate_statement(stmt, indent + 1)
+        code += "  " * indent + "}\n\n"
         return code
 
     def generate_statement(self, stmt, indent=0):
         indent_str = "    " * indent
-        if isinstance(stmt, VariableNode):
-            return f"{indent_str}{self.map_type(stmt.vtype)} {stmt.name};\n"
-        elif isinstance(stmt, AssignmentNode):
-            return f"{indent_str}{self.generate_assignment(stmt)};\n"
-        elif isinstance(stmt, IfNode):
-            return self.generate_if(stmt, indent)
-        elif isinstance(stmt, ForNode):
-            return self.generate_for(stmt, indent)
-        elif isinstance(stmt, ReturnNode):
-            code = ""
-            for i, return_stmt in enumerate(stmt.value):
-                code += f"{self.generate_expression(return_stmt)}"
-                if i < len(stmt.value) - 1:
-                    code += ", "
-            return f"{indent_str}return {code};\n"
+        statement_handlers = {
+            VariableNode: lambda stmt: f"{indent_str}{self.map_type(stmt.vtype)} {stmt.name};\n",
+            AssignmentNode: lambda stmt: f"{indent_str}{self.generate_assignment(stmt)};\n",
+            IfNode: lambda stmt: self.generate_if(stmt, indent),
+            ForNode: lambda stmt: self.generate_for(stmt, indent),
+            ReturnNode: lambda stmt: self.generate_return(stmt, indent),
+        }
+
+        handler = statement_handlers.get(type(stmt))
+        if handler:
+            return handler(stmt)
         else:
             return f"{indent_str}{self.generate_expression(stmt)};\n"
 
@@ -199,15 +185,10 @@ def generate_if(self, node, indent):
     def generate_for(self, node, indent):
         indent_str = "    " * indent
 
-        init = self.generate_statement(node.init, 0).strip()[
-            :-1
-        ]  # Remove trailing semicolon
-
-        condition = self.generate_statement(node.condition, 0).strip()[
-            :-1
-        ]  # Remove trailing semicolon
-
-        update = self.generate_statement(node.update, 0).strip()[:-1]
+        # Extract and remove the trailing semicolon from init, condition, and update expressions
+        init = self.generate_statement(node.init, 0).strip().rstrip(";")
+        condition = self.generate_statement(node.condition, 0).strip().rstrip(";")
+        update = self.generate_statement(node.update, 0).strip().rstrip(";")
 
         code = f"{indent_str}for ({init}; {condition}; {update}) {{\n"
         for stmt in node.body:
@@ -215,6 +196,15 @@ def generate_for(self, node, indent):
         code += f"{indent_str}}}\n"
         return code
 
+    def generate_return(self, node, indent):
+        indent_str = "    " * indent
+        code = ""
+        for i, return_stmt in enumerate(node.value):
+            code += f"{self.generate_expression(return_stmt)}"
+            if i < len(node.value) - 1:
+                code += ", "
+        return f"{indent_str}return {code};\n"
+
     def generate_expression(self, expr):
         if isinstance(expr, str):
             return expr
@@ -225,12 +215,10 @@ def generate_expression(self, expr):
             left = self.generate_expression(expr.left)
             right = self.generate_expression(expr.right)
             return f"{left} {self.map_operator(expr.op)} {right}"
-
         elif isinstance(expr, AssignmentNode):
             left = self.generate_expression(expr.left)
             right = self.generate_expression(expr.right)
             return f"{left} {self.map_operator(expr.operator)} {right}"
-
         elif isinstance(expr, UnaryOpNode):
             operand = self.generate_expression(expr.operand)
             return f"{self.map_operator(expr.op)}{operand}"
@@ -240,7 +228,6 @@ def generate_expression(self, expr):
         elif isinstance(expr, MemberAccessNode):
             obj = self.generate_expression(expr.object)
             return f"{obj}.{expr.member}"
-
         elif isinstance(expr, TernaryOpNode):
             return f"{self.generate_expression(expr.condition)} ? {self.generate_expression(expr.true_expr)} : {self.generate_expression(expr.false_expr)}"
         else:
@@ -285,7 +272,7 @@ def map_operator(self, op):
         return op_map.get(op, op)
 
     def map_semantic(self, semantic):
-        if semantic is not None:
+        if semantic:
             return f": {self.semantic_map.get(semantic, semantic)}"
         else:
-            return ""
+            return ""  # Handle None by returning an empty string

From 5c8db22745511d804bb23d09b32c87f91d58e088 Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 18:22:41 +0000
Subject: [PATCH 08/13] Fix: improve PR merge workflow by enhancing checkout
 and git configuration steps

---
 .github/workflows/PR_merge_main.yml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index 1be76236..a48e3975 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -6,25 +6,27 @@ jobs:
   update-pr:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout PR branch
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Fetch PR branch and main
         run: |
           # Fetch the PR branch and the main branch
           git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-branch
           git fetch origin main
+          git checkout -b pr-branch FETCH_HEAD
 
       - name: Set git user
         run: |
-          git config --global user.email "${{secrets.OWNER_EMAIL}}"
-          git config --global user.name "Nripesh Niketan"
+          git config --global user.email "${{ secrets.OWNER_EMAIL }}"
+          git config --global user.name "GitHub Action Bot"
 
       - name: Merge main into PR branch
         run: |
-          # Checkout the PR branch
-          git checkout pr-branch
           # Merge the main branch into the PR branch
-          git merge origin/main
+          git merge origin/main || exit 1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 

From 830485a50df9bb1fc6082cfa97b9b3439a043ad5 Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 18:27:14 +0000
Subject: [PATCH 09/13] Fix: enhance PR merge workflow by ensuring branch
 cleanup before fetching

---
 .github/workflows/PR_merge_main.yml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index a48e3975..8affaff0 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -13,10 +13,16 @@ jobs:
 
       - name: Fetch PR branch and main
         run: |
-          # Fetch the PR branch and the main branch
+          # Check if the branch already exists and delete it if necessary
+          if git rev-parse --verify pr-branch; then
+            git branch -D pr-branch
+          fi
+          # Fetch the PR branch and create a local branch
           git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-branch
-          git fetch origin main
           git checkout -b pr-branch FETCH_HEAD
+          # Fetch the main branch
+          git fetch origin main
+        
 
       - name: Set git user
         run: |

From 7c247a07e348a31a0bd015306da2b014fb2d0ccf Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 18:31:29 +0000
Subject: [PATCH 10/13] Fix: improve PR merge workflow by refining branch
 checkout and fetch steps

---
 .github/workflows/PR_merge_main.yml | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index 8affaff0..73d63e6e 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -6,23 +6,25 @@ jobs:
   update-pr:
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout PR branch
+      - name: Checkout code
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - name: Fetch PR branch and main
+      - name: Remove old pr-branch (if any)
         run: |
-          # Check if the branch already exists and delete it if necessary
-          if git rev-parse --verify pr-branch; then
+          if git rev-parse --verify pr-branch 2>/dev/null; then
             git branch -D pr-branch
           fi
-          # Fetch the PR branch and create a local branch
-          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-branch
+
+      - name: Fetch PR branch into FETCH_HEAD
+        run: |
+          git fetch origin pull/${{ github.event.pull_request.number }}/head
           git checkout -b pr-branch FETCH_HEAD
-          # Fetch the main branch
+
+      - name: Fetch main
+        run: |
           git fetch origin main
-        
 
       - name: Set git user
         run: |
@@ -31,12 +33,12 @@ jobs:
 
       - name: Merge main into PR branch
         run: |
-          # Merge the main branch into the PR branch
-          git merge origin/main || exit 1
+          git merge origin/main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Push changes
+      - name: Push changes back to the PR
         run: |
-          # Push the updated PR branch back to the forked repository
-          git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}
+          git push \
+            "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}" \
+            pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}

From 01218635503fa2e1d36bf1fe2149b0a9438443dc Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 18:35:47 +0000
Subject: [PATCH 11/13] Fix: update GitHub Actions workflow to use OWNER_TOKEN
 for authentication

---
 .github/workflows/PR_merge_main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index 73d63e6e..b63162ad 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -35,10 +35,10 @@ jobs:
         run: |
           git merge origin/main
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.OWNER_TOKEN }}
 
       - name: Push changes back to the PR
         run: |
           git push \
-            "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}" \
+            "https://x-access-token:${{ secrets.OWNER_TOKEN }}@github.com/${{ github.repository }}" \
             pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}

From 294f57006d63534422152fb677cf1707a2c4114d Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <nripesh14@gmail.com>
Date: Fri, 3 Jan 2025 18:42:32 +0000
Subject: [PATCH 12/13] Fix: update GitHub Actions workflow to set specific git
 user name

---
 .github/workflows/PR_merge_main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
index b63162ad..3b12e434 100644
--- a/.github/workflows/PR_merge_main.yml
+++ b/.github/workflows/PR_merge_main.yml
@@ -29,7 +29,7 @@ jobs:
       - name: Set git user
         run: |
           git config --global user.email "${{ secrets.OWNER_EMAIL }}"
-          git config --global user.name "GitHub Action Bot"
+          git config --global user.name "Nripesh Niketan"
 
       - name: Merge main into PR branch
         run: |

From a83549c0ab4085533ad0b3011b3ca1872b455bdd Mon Sep 17 00:00:00 2001
From: Nripesh Niketan <86844847+NripeshN@users.noreply.github.com>
Date: Fri, 3 Jan 2025 18:46:11 +0000
Subject: [PATCH 13/13] Delete .github/workflows/PR_merge_main.yml

---
 .github/workflows/PR_merge_main.yml | 44 -----------------------------
 1 file changed, 44 deletions(-)
 delete mode 100644 .github/workflows/PR_merge_main.yml

diff --git a/.github/workflows/PR_merge_main.yml b/.github/workflows/PR_merge_main.yml
deleted file mode 100644
index 3b12e434..00000000
--- a/.github/workflows/PR_merge_main.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-name: PR Merge Main
-on:
-  pull_request:
-
-jobs:
-  update-pr:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Remove old pr-branch (if any)
-        run: |
-          if git rev-parse --verify pr-branch 2>/dev/null; then
-            git branch -D pr-branch
-          fi
-
-      - name: Fetch PR branch into FETCH_HEAD
-        run: |
-          git fetch origin pull/${{ github.event.pull_request.number }}/head
-          git checkout -b pr-branch FETCH_HEAD
-
-      - name: Fetch main
-        run: |
-          git fetch origin main
-
-      - name: Set git user
-        run: |
-          git config --global user.email "${{ secrets.OWNER_EMAIL }}"
-          git config --global user.name "Nripesh Niketan"
-
-      - name: Merge main into PR branch
-        run: |
-          git merge origin/main
-        env:
-          GITHUB_TOKEN: ${{ secrets.OWNER_TOKEN }}
-
-      - name: Push changes back to the PR
-        run: |
-          git push \
-            "https://x-access-token:${{ secrets.OWNER_TOKEN }}@github.com/${{ github.repository }}" \
-            pr-branch:refs/heads/${{ github.event.pull_request.head.ref }}