language

Some fools attempt at an interpreted language
Log | Files | Refs

commit e35b2ed9a0cdad6df2f4c2860ac4c237c674ec3f
parent ae7f80e5114b977dee545b4ebfd631c7f21d25b9
Author: Paul Longtine <paullongtine@gmail.com>
Date:   Tue Apr  5 14:14:21 2016

More progress on compilerthing.

Diffstat:
 .gitignore                                |   2 +-
 src/lc/interpreter.py                     | 114 +++++++++++++++++-
 src/lc/lexer.py                           | 185 ++++++++++++++++++++++++++++-
 src/lc/main.py                            |   5 +-
 src/lc/parser.py                          | 208 +++++++++++++++++++++++++++++++-
 src/lc/test_files/simple.ti               |   4 +-
 src/lc/test_files/testing.ti              |  27 ++++-
 src/lexer_prototype/interpreter.py        |  28 +----
 src/lexer_prototype/lexer.py              | 176 +--------------------------
 src/lexer_prototype/main.py               |   7 +-
 src/lexer_prototype/parser.py             | 193 +-----------------------------
 src/lexer_prototype/test_files/simple.ti  |   4 +-
 src/lexer_prototype/test_files/testing.ti |  29 +----
 13 files changed, 545 insertions(+), 437 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -2,3 +2,5 @@ *.o toi + +*.pyc diff --git a/src/lc/interpreter.py b/src/lc/interpreter.py @@ -0,0 +1,114 @@ +from parser import * + +class AbstractToken(): + def __init__(self, interpreter_instance, raw_data): + self.data = raw_data + self.expr = [] + self.i = interpreter_instance + self.line = self.i.line + self.update() + + def update(self): + pass + + def action(self): + pass + +class Label(AbstractToken): + def update(self): + pass + + def action(self): + pass + +class Arguements(AbstractToken): + def update(self): + tokens = [] + tmp = [] + capturing = False + esc_chars = [["[", "("], ["]", ")"]] + for x in self.data[1:-1]: + if x in esc_chars[0]: + capturing = esc_chars[0].index(x) + tmp.append(x) + elif x in esc_chars[1]: + if esc_chars[1][capturing] == x: + capturing = False + tmp.append(x) + elif x != ",": + tmp.append(x) + + if x == "," and not capturing: + tokens.append(tmp) + tmp = [] + tokens.append(tmp) + + for t in tokens: + self.expr.append(Expression(self.i, t)) + + def action(self): + for i in self.expr: + i.action() + +class Type(AbstractToken): + def update(self): + pass + + def action(self): + pass + +class Parameters(AbstractToken): + def update(self): + tmp = [] + for x in self.data: + if x != "(" and x != ")" and x != ",": + tmp.append(x) + elif x == "," or x == ")": + t = Type(self.i, tmp[0]) + l = Label(self.i, tmp[1]) + self.expr.append([t, l]) + tmp = [] + + def action(self): + pass + +class Expression(AbstractToken): + def update(self): + pass + + def action(self): + pass + +class Interpreter(): + def __init__(self, filename): + self.p = Parser(filename) + + self.program = self.p.get_statements() + + self.line = (None, None) + + self.contex = {} + + #initalizes values n' stuff + for self.line in self.program: + self.line.append(self.line[0].action(self)) + + def loop_until_ready(self): + for self.line in self.program: + self.line[2].action() + + def eval_label(self, index): + return(Label(self, self.line[1][index][0])) + + def eval_args(self, index): + return(Arguements(self, self.line[1][index])) + + def eval_type(self, index): + return(Type(self, self.line[1][index])) + + def eval_param(self, index): + return(Parameters(self, self.line[1][index])) + + def eval_expr(self, index): + return(Expression(self, self.line[1][index])) + diff --git a/src/lc/lexer.py b/src/lc/lexer.py @@ -0,0 +1,185 @@ +import re + +class AtomicSymbol(): + def __init__(self, symbol): + self.symbol = re.compile(symbol) + + def match(self, tokenstring, index): + return [index + 1, [tokenstring[index]]] \ + if self.symbol.match(tokenstring[index]) else [False, None] + +class CompoundSymbol(): + def __init__(self, symbols): + for x, i in enumerate(symbols): + if type(i) is str: + symbols[x] = AtomicSymbol(i) + + self.symbols = symbols + + def match(self, tokenstring, index): + rv = [] + for i in self.symbols: + r = i.match(tokenstring, index) + if r[0]: + rv = r[1] + index = r[0] + break + + return [index, rv] if len(rv) > 0 else [False, None] + +class InclusiveSymbol(CompoundSymbol): + def match(self, tokenstring, index): + rv = [] + for i in self.symbols: + r = i.match(tokenstring, index) + if r[0]: + rv = r[1] + index = r[0] + break + + return [index, rv] if len(rv) > 0 else [False, None] + +class ExclusiveSymbol(CompoundSymbol): + def match(self, tokenstring, index): + rv = [tokenstring[index]] + for i in self.symbols: + r = i.match(tokenstring, index) + if r[0]: + rv = [] + + return [index + 1, rv] if len(rv) > 0 else [False, None] + +class PolySymbol(): + def __init__(self, symbols, terminator=[]): + self.symbols = symbols + self.terminator = terminator + + def match(self, tokenstring, index): + rv = [] + while index+1 < len(tokenstring): + for t in self.terminator: + r = t.match(tokenstring, index) + if r[0]: + break + v = False + for s in self.symbols: + r = s.match(tokenstring, index) + if r[0]: + rv.extend(r[1]) + index = r[0] + v = True + break + if not v: + break + + return [index, rv] if len(rv) > 0 else [False, None] + +class GroupingSymbol(PolySymbol): + def match(self, tokenstring, index): + rv = [] + r = self.symbols[0].match(tokenstring, index) + if r[0]: + rv.extend(r[1]) + index = r[0] + ignore = 0 + while index < len(tokenstring): + r = self.symbols[0].match(tokenstring, index) + if r[0]: + ignore += 1 + rv.extend(r[1]) + index = r[0] + r = self.symbols[1].match(tokenstring, index) + if r[0]: + index = r[0] + rv.extend(r[1]) + if not ignore > 0: + break + else: + ignore -= 1 + else: + rv.append(tokenstring[index]) + index += 1 + + return [index, rv] if len(rv) > 0 else [False, None] + +class Statement(): + def __init__(self, name, expression=[], init=None): + self.name = name + self.expr = expression + self.action = init + + def match(self, tokenstring): + rv = [] + index = 0 + for e in self.expr: + r = e.match(tokenstring, index) + if r[0]: + rv.append(r[1]) + index = r[0] + else: + break + + return rv if index == len(tokenstring) else False + +class Tokenizer(): + def __init__(self, symbol_delim, statement_delim): + self.symbol_delim = symbol_delim + self.statement_delim = statement_delim + self.symbols = [] + + # Based off of self.symbol_delim, and string literals, break code into bits + def generate_symbols(self, raw_string): + tmp = "" + + #Thing that keeps string literals in tact. + in_string = False + no_escape = True + for char in raw_string: + if char == "\\": + no_escape = False + if char == "\"" and no_escape: + if in_string: + tmp = tmp + char + self.symbols.append(tmp) + tmp = "" + in_string = False + else: + self.symbols.append(tmp) + tmp = "" + tmp = "\0" + tmp + char + in_string = True + else: + tmp = tmp + char + if char != "\\" and no_escape == False: + no_escape = True + + self.symbols.append(tmp) + + # Go and split them codes into symbols! + for i in self.symbol_delim: + tmp = [] + for x in self.symbols: + if len(x) > 0: + # This checks for the work the above code did + # It prevents string literals from being subdivided + if x[0] != "\0": + tmp.extend(re.split("({})".format(i), x)) + else: + tmp.append(x) + self.symbols = tmp + + def generate_statements(self): + rv = [] + tmp = [] + for i in self.symbols: + t = i.strip() + if len(t) > 0: + tmp.append(t) + + for x in self.statement_delim: + if x == i: + rv.append(tmp) + tmp = [] + + return rv + diff --git a/src/lc/main.py b/src/lc/main.py @@ -0,0 +1,5 @@ +from interpreter import * +if __name__ == "__main__": + import sys + + t = Interpreter(sys.argv[1]) diff --git a/src/lc/parser.py b/src/lc/parser.py @@ -0,0 +1,208 @@ +from lexer import * + +class Parser(): + def __init__(self, file_name): + self.splitters = [ + ":", + ";", + "\(", + "\)", + "\[", + "\]", + "{", + "}", + ",", + " " + ] + self.end_statements = [ + ":", + ";", + "{", + "}" + ] + + self.known_tokens = [ + "if", + "else", + "for", + "while", + "func" + ] + + self.defined_types = [ + "int", + "float", + "array", + "object", + "ptr" + ] + + self.number_def = AtomicSymbol("[0-9]+") + + self.type_def = InclusiveSymbol(self.defined_types) + self.label_def = ExclusiveSymbol(self.defined_types + + self.known_tokens ) + + self.paramlist_def = GroupingSymbol( [ + AtomicSymbol("\("), + AtomicSymbol("\)") + ] ) + + self.expr_def = PolySymbol( [ + self.label_def, + self.number_def, + AtomicSymbol("\("), + AtomicSymbol("\)"), + AtomicSymbol("\+"), + AtomicSymbol("\-"), + AtomicSymbol("\*"), + AtomicSymbol("\/"), + AtomicSymbol("\>"), + AtomicSymbol("\<"), + AtomicSymbol("=\<"), + AtomicSymbol("\>="), + AtomicSymbol("=="), + AtomicSymbol("\""), + AtomicSymbol("'") + ], terminator=[ + AtomicSymbol(";"), + AtomicSymbol(":") + ]) + + self.active_tokens = [ + Statement( + "codeblock_begin", + expression=[ + AtomicSymbol("{") + ], + init=(lambda x: []) + ), + Statement( + "codeblock_end", + expression=[ + AtomicSymbol("}") + ], + init=(lambda x: []) + ), + Statement( + "if", + expression=[ + AtomicSymbol("if"), + self.expr_def, + AtomicSymbol(":") + ], + init=(lambda x: [x.eval_expr(1)]) + ), + Statement( + "else", + expression=[ + AtomicSymbol("else"), + AtomicSymbol(":") + ], + init=(lambda x: []) + ), + Statement( + "for", + expression=[ + AtomicSymbol("for"), + self.expr_def, + AtomicSymbol(":") + ], + init=(lambda x: [x.eval_expr(1)]) + ), + Statement( + "while", + expression=[ + AtomicSymbol("while"), + self.expr_def, + AtomicSymbol(":") + ], + init=(lambda x: [x.eval_expr(1)]) + ), + Statement( + "function", + expression=[ + AtomicSymbol("func"), + self.label_def, + self.paramlist_def, + AtomicSymbol("->"), + self.type_def, + AtomicSymbol(":") + ], + init=( + lambda x: [ + x.eval_label(1), + x.eval_param(2), + x.eval_type(4) + ]) + ), + Statement( + "instantiation", + expression=[ + self.type_def, + self.label_def, + AtomicSymbol("="), + self.expr_def, + AtomicSymbol(";") + ], + init=(lambda x: [ + x.eval_type(0), + x.eval_label(1), + x.eval_expr(3) + ]) + ), + Statement( + "assignment", + expression=[ + self.label_def, + AtomicSymbol("="), + self.expr_def, + AtomicSymbol(";") + ], + init=(lambda x: [ + x.eval_label(0), + x.eval_expr(2) + ]) + ), + Statement( + "func_call", + expression=[ + self.label_def, + self.paramlist_def, + AtomicSymbol(";") + ], + init=(lambda x: [ + x.eval_label(0), + x.eval_args(1) + ]) + ), + Statement( + "expression", + expression=[ + self.expr_def, + AtomicSymbol(";") + ], + init=(lambda x: [x.eval_expr(0)]) + ) + ] + data="" + with open(file_name, 'r') as program: + data=program.read().replace('\n', '') + + self.symbols = Tokenizer(self.splitters, self.end_statements) + + self.symbols.generate_symbols(data) + + self.lines = self.symbols.generate_statements() + + def get_statements(self): + rv = [] + for l in self.lines: + for a in self.active_tokens: + r = a.match(l) + if r: + rv.append([a,r]) + break + + return rv + diff --git a/src/lc/test_files/simple.ti b/src/lc/test_files/simple.ti @@ -0,0 +1,4 @@ +func testing (int i, int x) -> int: +{ + return(0); +} diff --git a/src/lc/test_files/testing.ti b/src/lc/test_files/testing.ti @@ -0,0 +1,27 @@ +func some_important_function (int x, int y, int z) -> int: +{ + int c = (x * y) + z; + if c > 3: + { + c = c - 3; + } + return(c); +} + +int x = 3; + +array y = [ "Symbols suck", 32, "I know, right?" ]; + +if x == 3: +{ + print("Potatoes are good for \"the\" soul."); +} + +for x in test(y): +{ + print(x); +} + +some_important_function(3, 4, some_important_function(2, 3)); + +test(test()); diff --git a/src/lexer_prototype/interpreter.py b/src/lexer_prototype/interpreter.py @@ -1,28 +0,0 @@ -from parser import * - -class Interpreter(): - def __init__(self, filename): - self.p = Parser(filename) - - self.program = self.p.get_statements() - - self.line = (None, None) - - for self.line in self.program: - self.line[0].action(self) - - def eval_label(self, index): - return(self.line) - - def eval_args(self, index): - return(self.line) - - def eval_type(self, index): - return(self.line) - - def eval_param(self, index): - return(self.line) - - def eval_expr(self, index): - return(self.line) - diff --git a/src/lexer_prototype/lexer.py b/src/lexer_prototype/lexer.py @@ -1,176 +0,0 @@ -import re - -class AtomicSymbol(): - def __init__(self, symbol): - self.symbol = re.compile(symbol) - - def match(self, tokenstring, index): - return [index + 1, [tokenstring[index]]] \ - if self.symbol.match(tokenstring[index]) else [False, None] - -class CompoundSymbol(): - def __init__(self, symbols): - for x, i in enumerate(symbols): - if type(i) is str: - symbols[x] = AtomicSymbol(i) - - self.symbols = symbols - - def match(self, tokenstring, index): - rv = [] - for i in self.symbols: - r = i.match(tokenstring, index) - if r[0]: - rv = r[1] - index = r[0] - break - - return [index, rv] if len(rv) > 0 else [False, None] - -class InclusiveSymbol(CompoundSymbol): - def match(self, tokenstring, index): - rv = [] - for i in self.symbols: - r = i.match(tokenstring, index) - if r[0]: - rv = r[1] - index = r[0] - break - - return [index, rv] if len(rv) > 0 else [False, None] - -class ExclusiveSymbol(CompoundSymbol): - def match(self, tokenstring, index): - rv = [tokenstring[index]] - for i in self.symbols: - r = i.match(tokenstring, index) - if r[0]: - rv = [] - - return [index + 1, rv] if len(rv) > 0 else [False, None] - -class PolySymbol(): - def __init__(self, symbols, terminator=[]): - self.symbols = symbols - self.terminator = terminator - - def match(self, tokenstring, index): - rv = [] - while index+1 < len(tokenstring): - for t in self.terminator: - r = t.match(tokenstring, index) - if r[0]: - break - v = False - for s in self.symbols: - r = s.match(tokenstring, index) - if r[0]: - rv.extend(r[1]) - index = r[0] - v = True - break - if not v: - break - - return [index, rv] if len(rv) > 0 else [False, None] - -class GroupingSymbol(PolySymbol): - def match(self, tokenstring, index): - rv = [] - r = self.symbols[0].match(tokenstring, index) - if r[0]: - rv.extend(r[1]) - index = r[0] - while index < len(tokenstring): - r = self.symbols[1].match(tokenstring, index) - if r[0]: - index = r[0] - rv.extend(r[1]) - break - else: - rv.append(tokenstring[index]) - index += 1 - - return [index, rv] if len(rv) > 0 else [False, None] - -class Statement(): - def __init__(self, name, expression=[], func=None): - self.name = name - self.expr = expression - self.action = func - - def match(self, tokenstring): - rv = [] - index = 0 - for e in self.expr: - r = e.match(tokenstring, index) - if r[0]: - rv.append(r[1]) - index = r[0] - else: - break - - return rv if index == len(tokenstring) else False - -class Tokenizer(): - def __init__(self, symbol_delim, statement_delim): - self.symbol_delim = symbol_delim - self.statement_delim = statement_delim - self.symbols = [] - - # Based off of self.symbol_delim, and string literals, break code into bits - def generate_symbols(self, raw_string): - tmp = "" - - #Thing that keeps string literals in tact. - in_string = False - no_escape = True - for char in raw_string: - if char == "\\": - no_escape = False - if char == "\"" and no_escape: - if in_string: - tmp = tmp + char - self.symbols.append(tmp) - tmp = "" - in_string = False - else: - self.symbols.append(tmp) - tmp = "" - tmp = "\0" + tmp + char - in_string = True - else: - tmp = tmp + char - if char != "\\" and no_escape == False: - no_escape = True - - self.symbols.append(tmp) - - # Go and split them codes into symbols! - for i in self.symbol_delim: - tmp = [] - for x in self.symbols: - if len(x) > 0: - # This checks for the work the above code did - # It prevents string literals from being subdivided - if x[0] != "\0": - tmp.extend(re.split("({})".format(i), x)) - else: - tmp.append(x) - self.symbols = tmp - - def generate_statements(self): - rv = [] - tmp = [] - for i in self.symbols: - t = i.strip() - if len(t) > 0: - tmp.append(t) - - for x in self.statement_delim: - if x == i: - rv.append(tmp) - tmp = [] - - return rv - diff --git a/src/lexer_prototype/main.py b/src/lexer_prototype/main.py @@ -1,7 +0,0 @@ -from parser import * -if __name__ == "__main__": - import sys - - thing = Parser(sys.argv[1]) - for i in thing.get_statements(): - print(i) diff --git a/src/lexer_prototype/parser.py b/src/lexer_prototype/parser.py @@ -1,193 +0,0 @@ -from lexer import * - -class Parser(): - def __init__(self, file_name): - self.splitters = [ - ":", - ";", - "\(", - "\)", - "\[", - "\]", - "{", - "}", - ",", - " " - ] - self.end_statements = [ - ":", - ";", - "{", - "}" - ] - - self.known_tokens = [ - "if", - "else", - "for", - "while", - "func" - ] - - self.defined_types = [ - "int", - "float", - "array", - "object", - "ptr" - ] - - self.number_def = AtomicSymbol("[0-9]+") - - self.type_def = InclusiveSymbol(self.defined_types) - self.label_def = ExclusiveSymbol(self.defined_types + - self.known_tokens ) - - self.paramlist_def = GroupingSymbol( [ - AtomicSymbol("\("), - AtomicSymbol("\)") - ] ) - - self.expr_def = PolySymbol( [ - self.label_def, - self.number_def, - AtomicSymbol("\("), - AtomicSymbol("\)"), - AtomicSymbol("\+"), - AtomicSymbol("\-"), - AtomicSymbol("\*"), - AtomicSymbol("\/"), - AtomicSymbol("\>"), - AtomicSymbol("\<"), - AtomicSymbol("=\<"), - AtomicSymbol("\>="), - AtomicSymbol("=="), - AtomicSymbol("\""), - AtomicSymbol("'") - ], terminator=[ - AtomicSymbol(";"), - AtomicSymbol(":") - ]) - - self.active_tokens = [ - Statement( - "codeblock_begin", - expression=[ - AtomicSymbol("{") - ], - func=(lambda x: []) - ), - Statement( - "codeblock_end", - expression=[ - AtomicSymbol("}") - ], - func=(lambda x: []) - ), - Statement( - "if", - expression=[ - AtomicSymbol("if"), - self.expr_def, - AtomicSymbol(":") - ], - func=(lambda x: [x.eval_expr(1)]) - ), - Statement( - "else", - expression=[ - AtomicSymbol("else"), - AtomicSymbol(":") - ], - func=(lambda x: []) - ), - Statement( - "for", - expression=[ - AtomicSymbol("for"), - self.expr_def, - AtomicSymbol(":") - ], - func=(lambda x: [x.eval_expr(1)]) - ), - Statement( - "while", - expression=[ - AtomicSymbol("while"), - self.expr_def, - AtomicSymbol(":") - ], - func=(lambda x: [x.eval_expr(1)]) - ), - Statement( - "function", - expression=[ - AtomicSymbol("func"), - self.label_def, - self.paramlist_def, - AtomicSymbol("->"), - self.type_def, - AtomicSymbol(":") - ], - func=(lambda x: [x.eval_label(1), x.eval_param(2), x.eval_type(5)]) - ), - Statement( - "instantiation", - expression=[ - self.type_def, - self.label_def, - AtomicSymbol("="), - self.expr_def, - AtomicSymbol(";") - ], - func=(lambda x: [x.eval_type(0), x.eval_label(1), x.eval_expr(3)]) - ), - Statement( - "assignment", - expression=[ - self.label_def, - AtomicSymbol("="), - self.expr_def, - AtomicSymbol(";") - ], - func=(lambda x: [x.eval_label(0), x.eval_expr(2)]) - ), - Statement( - "func_call", - expression=[ - self.label_def, - self.paramlist_def, - AtomicSymbol(";") - ], - func=(lambda x: [x.eval_label(0), x.eval_args(1)]) - ), - Statement( - "expression", - expression=[ - self.expr_def, - AtomicSymbol(";") - ], - func=(lambda x: [x.eval_expr(0)]) - ) - ] - data="" - with open(file_name, 'r') as program: - data=program.read().replace('\n', '') - - self.symbols = Tokenizer(self.splitters, self.end_statements) - - self.symbols.generate_symbols(data) - - self.lines = self.symbols.generate_statements() - - def get_statements(self): - rv = [] - for l in self.lines: - for a in self.active_tokens: - r = a.match(l) - if r: - rv.append((a,r)) - break - - return rv - diff --git a/src/lexer_prototype/test_files/simple.ti b/src/lexer_prototype/test_files/simple.ti @@ -1,4 +0,0 @@ -func testing (int i, int x) -> int: -{ - return(0); -} diff --git a/src/lexer_prototype/test_files/testing.ti b/src/lexer_prototype/test_files/testing.ti @@ -1,29 +0,0 @@ -func some_important_function (int x, int y, int z) -> int: -{ - int c = (x * y) + z; - if c > 3: - { - c = c - 3; - } - return(c); -} - -int x = 3; - -array y = [ "Symbols suck", 32, "I know, right?" ]; - -if x == 3: -{ - print("Potatoes are good for \"the\" soul."); -} - -for x in y: -{ - print(x); -} - -some_important_function(3, 4, 2); - -ptr something *= ObjectClass(); - -something.potato();