Files
nand2tetris/solutions_only/project 11/tokenizer.py
T
QkoSad 7894b48931 .
2025-07-16 13:00:37 +03:00

79 lines
2.7 KiB
Python
Executable File

import re
class Tokenizer:
def __init__(self):
self.i = 0
self.file = ''
self.symbols = ('(', ')', '[', ']', '}', '{', '>', '<', '=', '*', '+', '-', '/', '.', ';', ',', '&', '|',
'~')
self.key_word = (
'class', 'method', 'function', 'constructor', 'int', 'boolean', 'char', 'void', 'var', 'static', 'field',
'let', 'do', 'if', 'else', 'while', 'return', 'true', 'false', 'null', 'this')
self.token = ''
def token_type(self, token=None):
if token is None:
token = self.token
if token is None or token == '':
return None
if token in self.key_word:
return 'keyword'
elif token[0] == '"':
return 'stringConstant'
elif re.match(r"\d+", token):
return 'integerConstant'
elif token in self.symbols:
return 'symbol'
else:
return 'identifier'
def advance(self):
token = ''
i = self.i
while i < len(self.file):
if re.match(r'\s', self.file[i]):
i = i + 1
continue
else:
if self.file[i] in self.symbols:
self.token = self.file[i]
self.i = i + 1
return
elif self.file[i] == '"':
i += 1
while self.file[i] != '"':
token += self.file[i]
i += 1
self.i = i + 1
self.token = '"' + token + '"'
return
else:
while re.match(r'\w', self.file[i]):
token += self.file[i]
if i + 1 > len(self.file) - 1:
break
i += 1
self.i = i
self.token = token
return
def clear_file(self, directory):
with open(directory, "r") as my_file:
txt = my_file.read()
txt = re.sub(r"//.*", "", txt)
txt = re.sub(r"/[*][*].*[*]/", "", txt)
i = 0
# TODO this should be a regex
while i < len(txt):
if txt[i] == '/' and txt[i + 1] == '*' and txt[i + 2] == '*':
start = i
while txt[i] != '*' or txt[i + 1] != '/':
i += 1
stop = i + 2
txt = txt[:start] + txt[stop:len(txt)]
i = start - 1
i += 1
self.file = txt