This commit is contained in:
QkoSad
2025-07-16 13:00:37 +03:00
commit 7894b48931
806 changed files with 162532 additions and 0 deletions
+350
View File
@@ -0,0 +1,350 @@
import os
from pathlib import Path
from tokenizer import Tokenizer
from vmwriter import VMWriter
from symbol_table import SymbolTable
# TODO add names to variables when you call for them, aka current_vm_append(thingtobeappended=blablac)
class CompilationEngine:
def __init__(self, tokenizer, full_path_vm):
self.string = self.sub_type = self.class_name = self.function_type = ''
self.tab = self.recursion_index = 0
self.tokenizer = tokenizer
self.sym_table = []
self.vmwriter = VMWriter(full_path_vm)
self.current_vm = [] # used to reverse some of the commands, eg a+b need to be a b +
def search_kind_of_sym(self, current_vm):
if self.sym_table[-1].kind_of(current_vm) is not None:
return self.sym_table[-1].kind_of(current_vm), self.sym_table[-1].index_of(current_vm)
for i in range(len(self.sym_table) - 2, -1,
-1): # start from the amount of sym_tables -2 so it starts from one below the current,
# until it is bigger than -1, walking it backwards
if self.sym_table[i].kind_of(current_vm) in ('static', 'this'):
return self.sym_table[i].kind_of(current_vm), self.sym_table[i].index_of(current_vm)
def search_type_of_sym(self, current_vm):
for i in range(len(self.sym_table) - 1, -1, -1):
if self.sym_table[i].type_of(current_vm) is not None:
return self.sym_table[i].type_of(current_vm)
def write_token(self):
if self.tokenizer.token_type() == 'stringConstant':
self.string += ' ' * self.tab + '<' + self.tokenizer.token_type() + '> ' + self.tokenizer.token.strip(
'"') + ' </' + self.tokenizer.token_type() + '>\n'
else:
self.string += ' ' * self.tab + '<' + self.tokenizer.token_type() + '> ' + self.tokenizer.token + ' </' \
+ self.tokenizer.token_type() + '>\n'
def compile_class(self):
self.sym_table.append(SymbolTable())
self.tokenizer.advance() # class ->
self.tokenizer.advance() # type ->
self.class_name = self.tokenizer.token
self.tokenizer.advance() # name ->
self.tokenizer.advance() # { ->
while self.tokenizer.token != '}':
if self.tokenizer.token in ['static', 'field']:
self.compile_class_var_dec()
if self.tokenizer.token in ['constructor', 'function', 'method']:
self.compile_subroutine()
self.tokenizer.advance()
self.sym_table.pop()
self.vmwriter.close_vm_file()
def compile_class_var_dec(self):
var_kind = tokenizer_main.token
self.tokenizer.advance()
var_type = tokenizer_main.token
self.tokenizer.advance()
var_name = tokenizer_main.token
self.sym_table[-1].define(var_name, var_type, var_kind)
self.tokenizer.advance()
while self.tokenizer.token == ',':
self.tokenizer.advance()
var_name = tokenizer_main.token
self.sym_table[-1].define(var_name, var_type, var_kind)
self.tokenizer.advance()
self.tokenizer.advance()
def compile_subroutine(self):
self.sym_table.append(SymbolTable())
self.sub_type = self.tokenizer.token
self.tokenizer.advance() # subroutine type(function|method|constructor) ->
self.function_type = self.tokenizer.token
self.tokenizer.advance() # subroutine kind(int|void|etc..) ->
sub_name = self.tokenizer.token
self.tokenizer.advance() # subroutine name ->
self.tokenizer.advance() # ( ->
if self.sub_type == 'method':
self.sym_table[-1].start_subroutine('this', self.class_name)
self.compile_parameter_list()
self.tokenizer.advance() # { ->
while self.tokenizer.token == 'var': # create only symbol teable entries
self.compile_var_dec()
if self.sub_type == 'constructor':
self.vmwriter.write_function(f'{self.class_name}.{sub_name}', self.sym_table[-1].var_count('var'))
self.vmwriter.write_push('constant', self.sym_table[-2].var_count('field'))
self.vmwriter.write_call('Memory.alloc', 1)
self.vmwriter.write_pop('pointer', 0)
elif self.sub_type == 'method':
self.vmwriter.write_function(f'{self.class_name}.{sub_name}', self.sym_table[-1].var_count('var'))
self.vmwriter.write_push('argument', 0)
self.vmwriter.write_pop('pointer', 0)
else:
self.vmwriter.write_function(f'{self.class_name}.{sub_name}', self.sym_table[-1].var_count('var'))
while self.tokenizer.token != '}':
self.compile_statements()
self.tokenizer.advance()
self.sym_table.pop()
def compile_parameter_list(self):
if self.tokenizer.token != ')':
var_type = self.tokenizer.token
self.tokenizer.advance() # var ype ->
var_name = self.tokenizer.token
self.sym_table[-1].define(var_name, var_type, 'argument')
self.tokenizer.advance() # var name ->
while self.tokenizer.token == ',':
self.tokenizer.advance() # , ->
var_type = self.tokenizer.token
self.tokenizer.advance() # type ->
var_name = self.tokenizer.token
self.sym_table[-1].define(var_name, var_type, 'argument')
self.tokenizer.advance() # name ->
self.tokenizer.advance() # )->
def compile_var_dec(self):
var_kind = tokenizer_main.token
self.tokenizer.advance()
var_type = tokenizer_main.token
self.tokenizer.advance()
var_name = tokenizer_main.token
self.sym_table[-1].define(var_name, var_type, var_kind)
self.tokenizer.advance()
while self.tokenizer.token == ',':
self.tokenizer.advance()
var_name = tokenizer_main.token
self.sym_table[-1].define(var_name, var_type, var_kind)
self.tokenizer.advance()
self.tokenizer.advance()
def compile_statements(self):
while True:
if self.tokenizer.token == 'let':
self.compile_let()
elif self.tokenizer.token == 'if':
self.compile_if()
elif self.tokenizer.token == 'while':
self.compile_while()
elif self.tokenizer.token == 'do':
self.compile_do()
elif self.tokenizer.token == 'return':
self.compile_return()
else:
break
def compile_do(self):
self.tokenizer.advance() # do ->
class_name = self.tokenizer.token
self.tokenizer.advance() # name ->
if self.tokenizer.token == '(': # method
self.vmwriter.write_push('pointer', 0)
self.tokenizer.advance() # ( ->
count = self.compile_expression_list()
self.tokenizer.advance() # ) ->
self.vmwriter.write_call(f'{self.class_name}.{class_name}', count + 1)
elif self.tokenizer.token == '.': # method or function
self.tokenizer.advance() # . ->
fname = f'{class_name}.{self.tokenizer.token}'
sname = f'{self.search_type_of_sym(class_name)}.{self.tokenizer.token}'
self.tokenizer.advance() # name ->
if self.search_kind_of_sym(class_name) is not None:
self.vmwriter.write_push(*self.search_kind_of_sym(class_name))
self.tokenizer.advance() # ( ->
count = self.compile_expression_list()
self.tokenizer.advance() # ) ->
if self.search_kind_of_sym(class_name) is not None:
self.vmwriter.write_call(f'{sname}', count + 1)
else:
self.vmwriter.write_call(f'{fname}', count)
self.vmwriter.write_pop('temp', '0')
self.tokenizer.advance() # ; ->
def compile_let(self):
flag_array = 0
self.tokenizer.advance() # let ->
self.current_vm.append(self.tokenizer.token)
self.tokenizer.advance() # var_name ->
if self.tokenizer.token == '[':
self.vmwriter.write_push(*self.search_kind_of_sym(self.current_vm[-1]))
self.tokenizer.advance() # [ ->
self.compile_expression()
self.tokenizer.advance() # ] ->
flag_array = 1
self.tokenizer.advance() # = ->
self.compile_expression()
self.tokenizer.advance() # ; ->
if flag_array == 0:
self.vmwriter.write_pop(*self.search_kind_of_sym(self.current_vm[-1]))
else:
self.vmwriter.write_pop('temp', 1)
self.vmwriter.write_arithmetic('+')
self.vmwriter.write_pop('pointer', 1)
self.vmwriter.write_push('temp', 1)
self.vmwriter.write_pop('that', 0)
self.current_vm.pop()
def compile_while(self):
self.tokenizer.advance() # while ->
label1 = self.vmwriter.label_index
self.vmwriter.write_lable(self.vmwriter.label_index)
self.vmwriter.label_index += 1
self.tokenizer.advance() # ( ->
self.compile_expression()
self.tokenizer.advance() # ) ->
label2 = self.vmwriter.label_index
self.vmwriter.write_if(self.vmwriter.label_index)
self.vmwriter.label_index += 1
self.tokenizer.advance() # { ->
self.compile_statements()
self.tokenizer.advance() # } ->
self.vmwriter.write_goto(label1)
self.vmwriter.write_lable(label2)
def compile_return(self):
self.tokenizer.advance() # return ->
if self.tokenizer.token != ';':
self.compile_expression()
self.vmwriter.write_return(self.function_type)
self.tokenizer.advance() # ; ->
def compile_if(self):
self.tokenizer.advance() # if ->
self.tokenizer.advance() # ( ->
self.compile_expression()
self.tokenizer.advance() # ) ->
label1 = self.vmwriter.label_index
self.vmwriter.write_if(self.vmwriter.label_index)
self.vmwriter.label_index += 1
self.tokenizer.advance() # { ->
self.compile_statements()
self.tokenizer.advance() # } ->
label2 = self.vmwriter.label_index
self.vmwriter.write_goto(self.vmwriter.label_index)
self.vmwriter.label_index += 1
self.vmwriter.write_lable(label1)
if self.tokenizer.token == 'else':
self.tokenizer.advance() # else ->
self.tokenizer.advance() # { ->
self.compile_statements()
self.tokenizer.advance() # } ->
self.vmwriter.write_lable(label2)
def compile_expression(self):
self.compile_term()
while self.tokenizer.token in ['+', '-', '*', '/', '|', '=', '>', '<', '&']:
self.current_vm.append(self.tokenizer.token)
self.tokenizer.advance() # symbol ->
self.compile_term()
self.vmwriter.write_arithmetic(self.current_vm[-1])
self.current_vm.pop()
def compile_term(self):
if self.tokenizer.token == '(': # expression ()
self.tokenizer.advance() # ( ->
self.compile_expression()
self.tokenizer.advance() # ) ->
elif self.tokenizer.token in ['~', '-']: # uniry op
self.current_vm.append(self.tokenizer.token)
tmp = 'neg' if self.tokenizer.token == '-' else self.tokenizer.token
self.tokenizer.advance() # ~ or - ->
self.compile_term()
self.vmwriter.write_arithmetic(tmp)
self.current_vm.pop()
elif self.tokenizer.token_type() != 'symbol':
self.current_vm.append(self.tokenizer.token)
self.tokenizer.advance() # integer, string, keyword, varnname, subroutine_name, class_name, var_name ->
if self.tokenizer.token == '[': # Array
self.tokenizer.advance() # [ ->
self.vmwriter.write_push(*self.search_kind_of_sym(self.current_vm[-1]))
self.current_vm.pop()
self.compile_expression()
self.vmwriter.write_arithmetic('+')
self.vmwriter.write_pop('pointer', 1)
self.vmwriter.write_push('that', 0)
self.tokenizer.advance() # ] ->
elif self.tokenizer.token == '(': # subroutine_name ()
self.tokenizer.advance() # ( ->
count = self.compile_expression_list()
self.tokenizer.advance() # ) ->
self.vmwriter.write_call(f'{self.class_name}.{self.current_vm[-1]}', count)
self.current_vm.pop()
elif self.tokenizer.token == '.': # method
if self.search_type_of_sym(self.current_vm[-1]) is not None:
flag = 1
self.vmwriter.write_push(*self.search_kind_of_sym(self.current_vm[-1]))
else:
flag = 0
self.tokenizer.advance() # . ->
fname = self.tokenizer.token
self.tokenizer.advance() # subroutine name ->
self.tokenizer.advance() # ( ->
count = self.compile_expression_list()
self.tokenizer.advance() # ) ->
if flag == 1:
self.vmwriter.write_call(f'{self.search_type_of_sym(self.current_vm[-1])}.{fname}', count + 1)
else:
self.vmwriter.write_call(f'{self.current_vm[-1]}.{fname}', count)
self.current_vm.pop()
elif self.tokenizer.token_type(self.current_vm[-1]) == 'stringConstant':
self.vmwriter.write_push('constant', len(self.current_vm[-1].strip('"')))
self.vmwriter.write_call('String.new', 1)
for index, item in enumerate(self.current_vm[-1].strip('"')):
self.vmwriter.write_push('constant', ord(item))
self.vmwriter.write_call('String.appendChar', 2)
self.current_vm.pop()
elif self.tokenizer.token_type(self.current_vm[-1]) == 'integerConstant':
self.vmwriter.write_push('constant', self.current_vm[-1])
self.current_vm.pop()
elif self.tokenizer.token_type(self.current_vm[-1]) == 'identifier':
self.vmwriter.write_push(*self.search_kind_of_sym(self.current_vm[-1]))
self.current_vm.pop()
elif self.current_vm[-1] == 'true':
self.vmwriter.write_push('constant', '1')
self.vmwriter.write_arithmetic('neg')
self.current_vm.pop()
elif self.current_vm[-1] == 'false' or self.current_vm[-1] == 'null':
self.vmwriter.write_push('constant', '0')
self.current_vm.pop()
elif self.current_vm[-1] == 'this':
self.vmwriter.write_push('pointer', '0')
self.current_vm.pop()
elif self.current_vm[-1] == 'that':
self.vmwriter.write_push('pointer', '1')
self.current_vm.pop()
def compile_expression_list(self):
count_exp = 0
if self.tokenizer.token in ['(', '~', '-'] or self.tokenizer.token_type() != 'symbol':
count_exp += 1
self.compile_expression()
while self.tokenizer.token == ',':
count_exp += 1
self.tokenizer.advance() # ,
self.compile_expression()
return count_exp
if __name__ == '__main__':
path = os.getcwd()
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
if name[-4:] == 'jack':
tokenizer_main = Tokenizer()
tokenizer_main.clear_file(Path(root, name))
full_path = Path(root, name[:-4] + 'vm')
comp_eng_main = CompilationEngine(tokenizer_main, full_path)
comp_eng_main.compile_class()
+62
View File
@@ -0,0 +1,62 @@
class Symbol:
def __init__(self):
self.s_name = ''
self.s_kind = ''
self.s_type = ''
self.s_index = 0
class SymbolTable:
def __init__(self):
self.sym = []
def define(self, s_name, s_type, s_kind):
self.sym.append(Symbol())
self.sym[-1].s_name = s_name
self.sym[-1].s_type = s_type
self.sym[-1].s_kind = s_kind
self.sym[-1].s_index = self.var_count(s_kind) - 1
def start_subroutine(self, s_name, s_type):
self.sym.append(Symbol())
self.sym[-1].s_name = s_name
self.sym[-1].s_type = s_type
self.sym[-1].s_kind = 'argument'
self.sym[-1].s_index = 0
def start_class(self, s_name, s_type):
self.sym.append(Symbol())
self.sym[-1].s_name = 'this'
self.sym[-1].s_type = s_type
self.sym[-1].s_kind = 'field'
self.sym[-1].s_index = 0
def var_count(self, s_kind):
count = 0 # it is -1 so the first index is 0
for i in self.sym:
if i.s_kind == s_kind:
count += 1
return count
def kind_of(self, s_name):
for i in self.sym:
if i.s_name == s_name:
if i.s_kind == 'var':
return 'local'
elif i.s_kind == 'field':
return 'this'
else:
return i.s_kind
return None
def type_of(self, s_name):
for i in self.sym:
if i.s_name == s_name:
return i.s_type
return None
def index_of(self, s_name):
for i in self.sym:
if i.s_name == s_name:
return i.s_index
return None
+78
View File
@@ -0,0 +1,78 @@
import re
class Tokenizer:
def __init__(self):
self.i = 0
self.file = ''
self.symbols = ('(', ')', '[', ']', '}', '{', '>', '<', '=', '*', '+', '-', '/', '.', ';', ',', '&', '|',
'~')
self.key_word = (
'class', 'method', 'function', 'constructor', 'int', 'boolean', 'char', 'void', 'var', 'static', 'field',
'let', 'do', 'if', 'else', 'while', 'return', 'true', 'false', 'null', 'this')
self.token = ''
def token_type(self, token=None):
if token is None:
token = self.token
if token is None or token == '':
return None
if token in self.key_word:
return 'keyword'
elif token[0] == '"':
return 'stringConstant'
elif re.match(r"\d+", token):
return 'integerConstant'
elif token in self.symbols:
return 'symbol'
else:
return 'identifier'
def advance(self):
token = ''
i = self.i
while i < len(self.file):
if re.match(r'\s', self.file[i]):
i = i + 1
continue
else:
if self.file[i] in self.symbols:
self.token = self.file[i]
self.i = i + 1
return
elif self.file[i] == '"':
i += 1
while self.file[i] != '"':
token += self.file[i]
i += 1
self.i = i + 1
self.token = '"' + token + '"'
return
else:
while re.match(r'\w', self.file[i]):
token += self.file[i]
if i + 1 > len(self.file) - 1:
break
i += 1
self.i = i
self.token = token
return
def clear_file(self, directory):
with open(directory, "r") as my_file:
txt = my_file.read()
txt = re.sub(r"//.*", "", txt)
txt = re.sub(r"/[*][*].*[*]/", "", txt)
i = 0
# TODO this should be a regex
while i < len(txt):
if txt[i] == '/' and txt[i + 1] == '*' and txt[i + 2] == '*':
start = i
while txt[i] != '*' or txt[i + 1] != '/':
i += 1
stop = i + 2
txt = txt[:start] + txt[stop:len(txt)]
i = start - 1
i += 1
self.file = txt
+75
View File
@@ -0,0 +1,75 @@
class VMWriter:
def __init__(self, vm_path):
self.my_vm = open(vm_path, "w+")
self.label_index = 0
def write_push(self, segment, index):
self.my_vm.write(f'push {segment} {index}\n')
print(f'push {segment} {index}')
def write_lable(self, label):
self.my_vm.write(f'label L{label}\n')
print(f'label L{label}')
def write_arithmetic(self, command):
if command == '+':
self.my_vm.write('add\n')
print('add')
elif command == '-':
self.my_vm.write('sub\n')
print('sub')
elif command == 'neg':
self.my_vm.write('neg\n')
print('neg')
elif command == '=':
self.my_vm.write('eq\n')
print('eq')
elif command == '>':
self.my_vm.write('gt\n')
print('gt')
elif command == '<':
self.my_vm.write('lt\n')
print('lt')
elif command == '&':
self.my_vm.write('and\n')
print('and')
elif command == '|':
self.my_vm.write('or\n')
print('or')
elif command == '~':
self.my_vm.write('not\n')
print('not')
elif command == '*':
self.write_call('Math.multiply', 2)
elif command == '/':
self.write_call('Math.divide', 2)
def write_pop(self, segment, index):
print(f'pop {segment} {index}')
self.my_vm.write(f'pop {segment} {index}\n')
def write_goto(self, label):
self.my_vm.write(f'goto L{label}\n')
print(f'goto L{label}')
def write_if(self, label):
self.write_arithmetic('~')
self.my_vm.write(f'if-goto L{label}\n')
print(f'if-goto L{label}')
def write_call(self, label, num_args):
self.my_vm.write(f'call {label} {num_args}\n')
print(f'call {label} {num_args}')
def write_function(self, label, num_locals):
self.my_vm.write(f'function {label} {num_locals}\n')
print(f'function {label} {num_locals}')
def write_return(self, func_type):
if func_type == 'void':
self.write_push('constant', '0')
self.my_vm.write('return\n')
print(f'return')
def close_vm_file(self):
self.my_vm.close()