language

Some fools attempt at an interpreted language
Log | Files | Refs

commit 60bc79a2a1d45616b3d08a2847d575bcd2171199
parent 6cc46e9726906b259c377a1fcfd0ed507630b9a2
Author: Paul Longtine <paullongtine@gmail.com>
Date:   Sun Apr 24 20:41:56 2016

Eh, did stuff. Mainly properly supporting <positive> integer seralization

Diffstat:
 .gitignore                |  2 +-
 src/lc/bytecode.py        | 31 +++++++++++----
 src/lc/helper.py          | 52 +++++++++++++++++++++++++-
 src/lc/interpreter.py     | 72 ++++++++---------------------------
 src/lc/main.py            | 14 +++++--
 src/lc/parser.py          |  3 +-
 src/lc/test_files/fibb.ti |  2 +-
 src/lc/test_files/if.ti   | 11 +++++-
 src/vm/inc/var_ops.h      |  8 ++++-
 src/vm/src/ins_def.c      | 22 +++++++++--
 src/vm/src/proc.c         |  2 +-
 src/vm/src/var_ops.c      | 98 ++++++++++++++++++++++++++++++++++++++++++++++++-
 12 files changed, 243 insertions(+), 74 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -4,3 +4,5 @@ toi *.pyc + +__pycache__ diff --git a/src/lc/bytecode.py b/src/lc/bytecode.py @@ -1,4 +1,5 @@ from memonic import * +from helper import * class VariableNew(): def __init__(self, label, typed): @@ -64,6 +65,7 @@ class FunctionCall(): self.label.action() ]) +#TODO: Implement this class ForLoop(): def __init__(self, expression): self.expr = expression @@ -77,25 +79,29 @@ class SerializeableType(): class StringConstant(SerializeableType): def __init__(self, value): - self.value = value[1:-1] - + self.value = [] + for i in value: + self.value.append(ord(i)) + def action(self): return([ - len(value), + OP_CTS, + int_to_bytes(len(self.value) + 1), 0x00, - value + 0x09, + self.value ]) class IntegerConstant(SerializeableType): def __init__(self, value): - self.value = int(value[0]) - if self.value > 0xFF: - print("ONE BYTE PLEASE THIS IS A PROTOTYPE!!") + t = int(value[0]) + self.raw = t + self.value = int_to_bytes(t) def action(self): return([ OP_CTS, - 0x02, + int_to_bytes(len(self.value) + 1), 0x00, 0x06, self.value @@ -112,3 +118,12 @@ class BinaryOp(): self.vals[1].action(), self.op.action() ]) + +class Opcode(): + def __init__(self, opcode): + self.opcode = opcode + + def action(self): + return(self.opcode) + + diff --git a/src/lc/helper.py b/src/lc/helper.py @@ -0,0 +1,52 @@ + +# This function splits a tokenstring by split characters, along with providing +# a way to preserve capturing groups via esc_chars +# An example call would be as follows: +# +# token_split( ["Hello", " ", "(", "Wonderful", " ", "World", ")"], +# [ ["("], [")"] ], +# [" "] ) +# +# Would return: +# [ [ "Hello" ], ["(", "Wonderful", " ", "World", ")" ] ] +def token_split(tokenstring, esc_chars, split_chars, include_splitter=True): + tokens = [] + tmp = [] + capturing = False + for x in tokenstring: + if x in esc_chars[0]: + capturing = esc_chars[0].index(x) + tmp.append(x) + elif x in esc_chars[1]: + if esc_chars[1][capturing] == x: + capturing = False + tmp.append(x) + elif include_splitter or not x in split_chars or capturing: + tmp.append(x) + + if x in split_chars and not capturing: + tokens.append(tmp) + tmp = [] + if len(tmp) > 0: + tokens.append(tmp) + + return tokens + +def int_to_bytes(number): + rv = [] + c = 0 + while (number / (0xFF << (8*c))) > 1: + rv.insert(0, (number & (0xFF << (8*c))) >> (8*c)) + c += 1 + + rv.insert(0, (number & (0xFF << (8*c))) >> (8*c)) + + return rv + +def int_to_word(number): + rv = [0, 0] + + rv[0] = (number & 0xFF00) >> 8 + rv[1] = number & 0x00FF + + return rv diff --git a/src/lc/interpreter.py b/src/lc/interpreter.py @@ -2,29 +2,7 @@ from parser import * from lexer import * from bytecode import * from memonic import * - -def token_split(tokenstring, esc_chars, split_chars, include_splitter=True): - tokens = [] - tmp = [] - capturing = False - for x in tokenstring: - if x in esc_chars[0]: - capturing = esc_chars[0].index(x) - tmp.append(x) - elif x in esc_chars[1]: - if esc_chars[1][capturing] == x: - capturing = False - tmp.append(x) - elif include_splitter or not x in split_chars or capturing: - tmp.append(x) - - if x in split_chars and not capturing: - tokens.append(tmp) - tmp = [] - if len(tmp) > 0: - tokens.append(tmp) - - return tokens +from helper import * class AbstractToken(): def __init__(self, interpreter_instance, raw_data): @@ -43,7 +21,6 @@ class AbstractToken(): class Label(AbstractToken): def update(self): f = lambda y, x: y(y, x[0]) if type(x) is list else x - self.data = f(f, self.data) self.scope = 0 if self.i.scope > 0 else 1 @@ -58,7 +35,7 @@ class Label(AbstractToken): if scope: return(self.scope) else: - return([0x00, self.expr]) + return(int_to_word(self.expr)) class Arguements(AbstractToken): def update(self): @@ -105,9 +82,9 @@ class Parameters(AbstractToken): tmp = [] def action(self): - types = map(lambda x: x[0].action(), self.expr) + types = list(map(lambda x: x[0].action(), self.expr)) return([ - len(types), + int_to_bytes(len(types)), 0x0, types ]) @@ -115,6 +92,7 @@ class Parameters(AbstractToken): class Expression(AbstractToken): def update(self): + self.expr = None self.operators = [ ["+", Opcode(OP_ADD)], ["-", Opcode(OP_SUB)], @@ -127,7 +105,7 @@ class Expression(AbstractToken): ["=<", Opcode(OP_LTHAN_EQ)] ] - self.operator_names = map(lambda x: x[0], self.operators) + self.operator_names = list(map(lambda x: x[0], self.operators)) self.func_call = Statement( "func_call", @@ -145,13 +123,6 @@ class Expression(AbstractToken): ], init=(lambda x,y: Expression(x, y[1:-1])) ) - self.string = Statement( - "string", - expression=[ - AtomicSymbol("^\0") - ], - init=(lambda x,y: StringConstant(y)) - ) self.integer = Statement( "integer", expression=[ @@ -170,7 +141,6 @@ class Expression(AbstractToken): self.identifiers = [ self.func_call, self.subexpr, - self.string, self.integer, self.label ] @@ -184,7 +154,6 @@ class Expression(AbstractToken): t = self.data if len(t) > 2: print("Expression Error ({})".format(self.data)) - return False next_op = False for thing in t: @@ -196,38 +165,31 @@ class Expression(AbstractToken): op = None obj = None - for i in self.identifiers: - r = i.match(ex) - if r: - obj = i.action(self.i, ex) + if ex[0][0] == "\0": + obj = StringConstant(ex[0][2:-1]) + else: + for i in self.identifiers: + r = i.match(ex) + if r: + obj = i.action(self.i, ex) if obj == None: print("Unknown Expression Error ({})".format(ex)) break if next_op: - self.expr[-1].vals.append(obj) + self.expr.vals.append(obj) next_op = False else: if op in self.operator_names: index = self.operator_names.index(op) - self.expr.append(BinaryOp(obj, self.operators[index][1])) + self.expr = BinaryOp(obj, self.operators[index][1]) next_op = True else: - self.expr.append(obj) - + self.expr = obj def action(self): - return([ - self.expr[0].action() - ]) - -class Opcode(): - def __init__(self, opcode): - self.opcode = opcode - - def action(self): - return([self.opcode]) + return(self.expr.action()); class Directive(): def __init__(self, function, conditional): diff --git a/src/lc/main.py b/src/lc/main.py @@ -1,16 +1,21 @@ from interpreter import * + def tobytearray(l, n, ba): for i in l: if type(i) is list: - n += 1 ba = tobytearray(i, n, ba) + n += 1 else: + if type(i) is bytes: + i = int.from_bytes(i, byteorder='big') + if type(i) is int: + ba.append(i) + print((" "*n)+hex(i)) - ba.append(i) return(ba) - + if __name__ == "__main__": import sys @@ -20,7 +25,7 @@ if __name__ == "__main__": itr = Interpreter(sys.argv[1]) - out = file(sys.argv[2], "w") + out = open(sys.argv[2], "wb") rv = [] for l in itr.program: @@ -31,3 +36,4 @@ if __name__ == "__main__": program = tobytearray(rv, 0, program) out.write(program) + out.close() diff --git a/src/lc/parser.py b/src/lc/parser.py @@ -83,7 +83,8 @@ class Parser(): AtomicSymbol("\>="), AtomicSymbol("=="), AtomicSymbol("\""), - AtomicSymbol("'") + AtomicSymbol("'"), + AtomicSymbol("\0") ], terminator=[ AtomicSymbol(";"), AtomicSymbol(":") diff --git a/src/lc/test_files/fibb.ti b/src/lc/test_files/fibb.ti @@ -1,7 +1,7 @@ int a = 0; int b = 1; -while a > 254: +while a > 100000000: { a = a + b; print b; diff --git a/src/lc/test_files/if.ti b/src/lc/test_files/if.ti @@ -9,6 +9,17 @@ int x = 3; if 3 == x: { x = x + 1; + + if 4 == x: + { + print "I will die"; + } else if 5 == x: + { + print "If this works"; + } else: + { + print "God help me"; + } } print test(x); diff --git a/src/vm/inc/var_ops.h b/src/vm/inc/var_ops.h @@ -36,6 +36,14 @@ var_cont* var_lthan_float(var_cont*, var_cont*); var_cont* var_lthan_int(var_cont*, var_cont*); var_cont* var_lthan(var_cont*, var_cont*); +var_cont* var_gthan_eq_float(var_cont*, var_cont*); +var_cont* var_gthan_eq_int(var_cont*, var_cont*); +var_cont* var_gthan_eq(var_cont*, var_cont*); + +var_cont* var_lthan_eq_float(var_cont*, var_cont*); +var_cont* var_lthan_eq_int(var_cont*, var_cont*); +var_cont* var_lthan_eq(var_cont*, var_cont*); + var_cont* var_eq_float(var_cont*, var_cont*); var_cont* var_eq_int(var_cont*, var_cont*); var_cont* var_eq(var_cont*, var_cont*); diff --git a/src/vm/src/ins_def.c b/src/vm/src/ins_def.c @@ -351,7 +351,9 @@ void _ins_def_GTHAN_EQ (rt_t* ctx, bc_cont* line) var_cont* A = stk_pop(ctx->stack); var_cont* B = stk_pop(ctx->stack); - // TODO + var_cont* C = var_gthan_eq(A, B); + + stk_push(ctx->stack, C); pc_inc(ctx->pc, 1); } @@ -360,7 +362,9 @@ void _ins_def_LTHAN_EQ (rt_t* ctx, bc_cont* line) var_cont* A = stk_pop(ctx->stack); var_cont* B = stk_pop(ctx->stack); - // TODO + var_cont* C = var_gthan_eq(A, B); + + stk_push(ctx->stack, C); pc_inc(ctx->pc, 1); } @@ -443,10 +447,18 @@ void _ins_def_IFDO (rt_t* ctx, bc_cont* line) // If the value is false, find an ELSE statement or DONE statement. if (value < 1) { + int level = 0; while (pc_safe(ctx->pc)) { pc_update(ctx->pc); pc_inc(ctx->pc, 1); + + // Is this instruction another IF statement? + if (ctx->pc->line->op == 0x72) + { + // Increment the if statement depth counter + level++; + } else // Is the instruction an ELSE statement? if (ctx->pc->line->op == 0x73) { @@ -456,8 +468,10 @@ void _ins_def_IFDO (rt_t* ctx, bc_cont* line) // Is the instruction a DONE statement? if (ctx->pc->line->op == 0x7E) { - // We're done here. - break; + // And we're not in another if statement, we're done here + if (level == 0) break; + + level--; } } } else diff --git a/src/vm/src/proc.c b/src/vm/src/proc.c @@ -34,7 +34,7 @@ void proc_run(rt_t* ctx) for (n = 0; pc_safe(ctx->pc); pc_update(ctx->pc)) { -// printf("%i - %i: %x\n", n, ctx->pc->address, ctx->pc->line->op); +//printf("%i - %i: %x\n", n, ctx->pc->address, ctx->pc->line->op); INS_DEF[ctx->pc->line->op](ctx, ctx->pc->line); diff --git a/src/vm/src/var_ops.c b/src/vm/src/var_ops.c @@ -12,6 +12,7 @@ void var_pprint(var_cont* var) { var_data_str* data = var->data; for (int i = 0; i < data->size; i++) printf("%c", data->v[i]); + printf("\n"); } else if (var->type == G_INT) { @@ -308,6 +309,103 @@ var_cont* var_lthan(var_cont* A, var_cont* B) return rv; } +var_cont* var_gthan_eq_float(var_cont* A, var_cont* B) +{ + var_cont* var = var_new(G_INT); + double AV = var_data_get_G_FLOAT(A); + double BV = var_data_get_G_FLOAT(B); + + int S = (AV >= BV); + + var_data_int* data = var_data_alloc_G_INT(S); + + var_set(var, data, G_INT); + + return var; +} +var_cont* var_gthan_eq_int(var_cont* A, var_cont* B) +{ + var_cont* var = var_new(G_INT); + int AV = var_data_get_G_INT(A); + int BV = var_data_get_G_INT(B); + + int S = (AV >= BV); + + var_data_int* data = var_data_alloc_G_INT(S); + + var_set(var, data, G_INT); + + return var; + +} +var_cont* var_gthan_eq(var_cont* A, var_cont* B) +{ + var_cont* rv; + + ASSERT(( A->type == B->type ), "Inconsistent Types\n"); + if (A->type == G_INT && B->type == G_INT) + { + rv = var_gthan_eq_int(A, B); + } else + if (A->type == G_FLOAT && B->type == G_FLOAT) + { + rv = var_gthan_eq_float(A, B); + } + + N_ASSERT(rv, "var_gthan\n"); + + return rv; +} + +var_cont* var_lthan_eq_float(var_cont* A, var_cont* B) +{ + var_cont* var = var_new(G_INT); + double AV = var_data_get_G_FLOAT(A); + double BV = var_data_get_G_FLOAT(B); + + int S = (AV <= BV); + + var_data_int* data = var_data_alloc_G_INT(S); + + var_set(var, data, G_INT); + + return var; +} +var_cont* var_lthan_eq_int(var_cont* A, var_cont* B) +{ + var_cont* var = var_new(G_INT); + int AV = var_data_get_G_INT(A); + int BV = var_data_get_G_INT(B); + + int S = (AV <= BV); + + var_data_int* data = var_data_alloc_G_INT(S); + + var_set(var, data, G_INT); + + return var; +} +var_cont* var_lthan_eq(var_cont* A, var_cont* B) +{ + var_cont* rv; + + ASSERT(( A->type == B->type ), "Inconsistent Types\n"); + if (A->type == G_INT && B->type == G_INT) + { + rv = var_lthan_eq_int(A, B); + } else + if (A->type == G_FLOAT && B->type == G_FLOAT) + { + rv = var_lthan_eq_float(A, B); + } + + N_ASSERT(rv, "var_lthan\n"); + + return rv; +} + + + var_cont* var_eq_float(var_cont* A, var_cont* B) { var_cont* var = var_new(G_INT);