Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into pr/245
Browse files Browse the repository at this point in the history
  • Loading branch information
NripeshN committed Jan 3, 2025
2 parents e197182 + a83549c commit 8cf10df
Show file tree
Hide file tree
Showing 34 changed files with 940 additions and 553 deletions.
168 changes: 142 additions & 26 deletions crosstl/backend/DirectX/DirectxLexer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,110 @@
import re
from typing import Iterator, Tuple, List


# using sets for faster lookup
SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}

# define keywords dictionary
KEYWORDS = {
"struct": "STRUCT",
"cbuffer": "CBUFFER",
"Texture2D": "TEXTURE2D",
"SamplerState": "SAMPLER_STATE",
"float": "FLOAT",
"float2": "FVECTOR",
"float3": "FVECTOR",
"float4": "FVECTOR",
"double": "DOUBLE",
"int": "INT",
"uint": "UINT",
"bool": "BOOL",
"void": "VOID",
"return": "RETURN",
"if": "IF",
"else": "ELSE",
"for": "FOR",
"while": "WHILE",
"do": "DO",
"register": "REGISTER",
"switch": "SWITCH",
"case": "CASE",
"default": "DEFAULT",
"break": "BREAK",
}

# use tuple for immutable token types that won't change
TOKENS = tuple(
[
("COMMENT_SINGLE", r"//.*"),
("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
("INCLUDE", r"\#include\b"),
("STRUCT", r"\bstruct\b"),
("CBUFFER", r"\bcbuffer\b"),
("TEXTURE2D", r"\bTexture2D\b"),
("SAMPLER_STATE", r"\bSamplerState\b"),
("FVECTOR", r"\bfloat[2-4]\b"),
("FLOAT", r"\bfloat\b"),
("DOUBLE", r"\bdouble\b"),
("INT", r"\bint\b"),
("UINT", r"\buint\b"),
("BOOL", r"\bbool\b"),
("MATRIX", r"\bfloat[2-4]x[2-4]\b"),
("VOID", r"\bvoid\b"),
("RETURN", r"\breturn\b"),
("IF", r"\bif\b"),
("ELSE_IF", r"\belse\sif\b"),
("ELSE", r"\belse\b"),
("FOR", r"\bfor\b"),
("WHILE", r"\bwhile\b"),
("DO", r"\bdo\b"),
("REGISTER", r"\bregister\b"),
("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
("NUMBER", r"\d+(\.\d+)?"),
("LBRACE", r"\{"),
("RBRACE", r"\}"),
("LPAREN", r"\("),
("RPAREN", r"\)"),
("LBRACKET", r"\["),
("RBRACKET", r"\]"),
("SEMICOLON", r";"),
("COMMA", r","),
("COLON", r":"),
("QUESTION", r"\?"),
("SHIFT_LEFT", r"<<"),
("SHIFT_RIGHT", r">>"),
("LESS_EQUAL", r"<="),
("GREATER_EQUAL", r">="),
("LESS_THAN", r"<"),
("GREATER_THAN", r">"),
("EQUAL", r"=="),
("NOT_EQUAL", r"!="),
("PLUS_EQUALS", r"\+="),
("MINUS_EQUALS", r"-="),
("MULTIPLY_EQUALS", r"\*="),
("DIVIDE_EQUALS", r"/="),
("ASSIGN_XOR", r"\^="),
("ASSIGN_OR", r"\|="),
("ASSIGN_AND", r"\&="),
("BITWISE_XOR", r"\^"),
("LOGICAL_AND", r"&&"),
("LOGICAL_OR", r"\|\|"),
("BITWISE_OR", r"\|"),
("DOT", r"\."),
("MULTIPLY", r"\*"),
("DIVIDE", r"/"),
("PLUS", r"\+"),
("MINUS", r"-"),
("EQUALS", r"="),
("WHITESPACE", r"\s+"),
("STRING", r"\"[^\"]*\""),
("SWITCH", r"\bswitch\b"),
("CASE", r"\bcase\b"),
("DEFAULT", r"\bdefault\b"),
("BREAK", r"\bbreak\b"),
("MOD", r"%"),
]
)

TOKENS = [
("COMMENT_SINGLE", r"//.*"),
Expand Down Expand Up @@ -100,36 +206,46 @@


class HLSLLexer:
def __init__(self, code):
def __init__(self, code: str):
self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
self.code = code
self.tokens = []
self.tokenize()
self._length = len(code)

def tokenize(self):
def tokenize(self) -> List[Tuple[str, str]]:
# tokenize the input code and return list of tokens
return list(self.token_generator())

def token_generator(self) -> Iterator[Tuple[str, str]]:
# function that yields tokens one at a time
pos = 0
while pos < len(self.code):
match = None

for token_type, pattern in TOKENS:
regex = re.compile(pattern)

match = regex.match(self.code, pos)
if match:
text = match.group(0)
if token_type == "IDENTIFIER" and text in KEYWORDS:
token_type = KEYWORDS[text]
if token_type not in [
"WHITESPACE",
"COMMENT_SINGLE",
"COMMENT_MULTI",
]:
token = (token_type, text)
self.tokens.append(token)
pos = match.end(0)
break
if not match:
while pos < self._length:
token = self._next_token(pos)
if token is None:
raise SyntaxError(
f"Illegal character '{self.code[pos]}' at position {pos}"
)
new_pos, token_type, text = token

if token_type == "IDENTIFIER" and text in KEYWORDS:
token_type = KEYWORDS[text]

if token_type not in SKIP_TOKENS:
yield (token_type, text)

pos = new_pos

yield ("EOF", "")

def _next_token(self, pos: int) -> Tuple[int, str, str]:
# find the next token starting at the given position
for token_type, pattern in self._token_patterns:
match = pattern.match(self.code, pos)
if match:
return match.end(0), token_type, match.group(0)
return None

self.tokens.append(("EOF", ""))
@classmethod
def from_file(cls, filepath: str, chunk_size: int = 8192) -> "HLSLLexer":
# create a lexer instance from a file, reading in chunks
with open(filepath, "r") as f:
return cls(f.read())
2 changes: 1 addition & 1 deletion crosstl/backend/DirectX/DirectxParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def parse_additive(self):

def parse_multiplicative(self):
left = self.parse_unary()
while self.current_token[0] in ["MULTIPLY", "DIVIDE"]:
while self.current_token[0] in ["MULTIPLY", "DIVIDE", "MOD"]:
op = self.current_token[1]
self.eat(self.current_token[0])
right = self.parse_unary()
Expand Down
Loading

0 comments on commit 8cf10df

Please sign in to comment.