parser.py (12797B)
1 from lexer import * 2 from memonic import * 3 from bytecode import * 4 5 class Parser(): 6 def __init__(self, file_name): 7 self.splitters = [ 8 ":", 9 ";", 10 "\(", 11 "\)", 12 "\[", 13 "\]", 14 "{", 15 "}", 16 ",", 17 "\+", 18 "\-", 19 "\*", 20 "\/", 21 "\t", 22 " " 23 ] 24 self.end_statements = [ 25 ":", 26 ";", 27 "{", 28 "}" 29 ] 30 31 self.known_tokens = [ 32 "return", 33 "print", 34 "if", 35 "else", 36 "for", 37 "while", 38 "func", 39 "class", 40 "var" 41 ] 42 43 self.TYPE_VOID = 0 44 self.TYPE_ADDR = 1 45 self.TYPE_TYPE = 2 46 self.TYPE_PLIST = 3 47 self.TYPE_FUNC = 4 48 self.TYPE_CLASS = 5 49 self.TYPE_OBJECT = 6 50 self.TYPE_INT = 7 51 self.TYPE_FLOAT = 8 52 self.TYPE_CHAR = 9 53 self.TYPE_STRING = 10 54 self.TYPE_STATIC_ARRAY = 11 55 self.TYPE_ARRAY = 12 56 self.TYPE_HASHTABLE = 13 57 self.TYPE_STACK = 14 58 59 self.defined_types = [ 60 "void", 61 "_addr", 62 "_type", 63 "_plist", 64 "func", 65 "class", 66 "object", 67 "int", 68 "float", 69 "char", 70 "string", 71 "s_array", 72 "array", 73 "hashtable", 74 "stack" 75 ] 76 77 # Defines what integers look like 78 self.int_def = AtomicSymbol("^[0-9]+$") 79 # Regular expression for encapsulating text in `"`, simply 80 self.str_def = AtomicSymbol("^\0+") 81 # Defines what type names exists 82 self.type_def = InclusiveSymbol(self.defined_types) 83 # Defines what reserved names exists 84 self.label_def = ExclusiveSymbol([self.int_def] + 85 [self.str_def] + 86 self.known_tokens ) 87 # Defines the parameter list defintion 88 self.paramlist_def = GroupingSymbol( [ 89 AtomicSymbol("\("), 90 AtomicSymbol("\)") 91 ] ) 92 # Defines the expression definition 93 self.expr_def = PolySymbol( [ 94 self.label_def, 95 self.int_def, 96 AtomicSymbol("\("), 97 AtomicSymbol("\)"), 98 AtomicSymbol("\+"), 99 AtomicSymbol("\-"), 100 AtomicSymbol("\*"), 101 AtomicSymbol("\/"), 102 AtomicSymbol("\>"), 103 AtomicSymbol("\<"), 104 AtomicSymbol("=\<"), 105 AtomicSymbol("\>="), 106 AtomicSymbol("=="), 107 AtomicSymbol("\""), 108 AtomicSymbol("'"), 109 AtomicSymbol("\0") 110 ], terminator=[ 111 AtomicSymbol(";"), 112 AtomicSymbol(":") 113 ]) 114 115 self.statement_endebug = Statement( 116 "endebug", 117 expression=[ 118 AtomicSymbol("DEBUG"), 119 AtomicSymbol(";") 120 ], 121 init=(lambda x: [x.op(OP_DEBUG)]) 122 ) 123 124 self.statement_include = Statement( 125 "include", 126 expression=[ 127 AtomicSymbol("include"), 128 self.label_def, 129 AtomicSymbol(";") 130 ], 131 init=(lambda x: []) 132 ) 133 134 self.statement_codeblock_begin = Statement( 135 "codeblock_begin", 136 expression=[ 137 AtomicSymbol("{") 138 ], 139 init=(lambda x: [x.push_directives()]) 140 ) 141 142 self.statement_codeblock_end = Statement( 143 "codeblock_end", 144 expression=[ 145 AtomicSymbol("}") 146 ], 147 init=(lambda x: [x.pop_directives()]) 148 ) 149 150 self.statement_return = Statement( 151 "return", 152 expression=[ 153 AtomicSymbol("return"), 154 self.expr_def, 155 AtomicSymbol(";") 156 ], 157 init=(lambda x: [ 158 x.eval_expr(1), 159 x.op(OP_STV), 160 x.op(0x00), 161 x.op(0x00), 162 x.op(0x00) 163 ]) 164 ) 165 166 self.statement_print = Statement( 167 "print", 168 expression=[ 169 AtomicSymbol("print"), 170 self.expr_def, 171 AtomicSymbol(";") 172 ], 173 init=(lambda x: [ 174 x.eval_expr(1), 175 x.op(OP_PRINT) 176 ]) 177 ) 178 179 self.statement_if = Statement( 180 "if", 181 expression=[ 182 AtomicSymbol("if"), 183 self.expr_def, 184 AtomicSymbol(":") 185 ], 186 init=(lambda x: [ 187 x.eval_expr(1), 188 x.op(OP_IFDO), 189 x.add_directive(lambda x: [x.op(OP_DONE)], 190 cond=( 191 lambda x: x.nxt(1)[0].name in ["else", "else_if"])) 192 ]) 193 ) 194 195 self.statement_else_if = Statement( 196 "else_if", 197 expression=[ 198 AtomicSymbol("else"), 199 AtomicSymbol("if"), 200 self.expr_def, 201 AtomicSymbol(":") 202 ], 203 init=(lambda x: [ 204 x.op(OP_ELSE), 205 x.eval_expr(2), 206 x.op(OP_IFDO), 207 x.add_directive(lambda x: [x.op(OP_DONE)], 208 cond=( 209 lambda x: x.nxt(1)[0].name in ["else", "else_if"])) 210 ]) 211 ) 212 213 self.statement_else = Statement( 214 "else", 215 expression=[ 216 AtomicSymbol("else"), 217 AtomicSymbol(":") 218 ], 219 init=(lambda x: [ 220 x.op(OP_ELSE), 221 x.add_directive(lambda x: [x.op(OP_DONE)]) 222 ]) 223 ) 224 225 self.statement_for = Statement( 226 "for", 227 expression=[ 228 AtomicSymbol("for"), 229 self.expr_def, 230 AtomicSymbol(":") 231 ], 232 init=(lambda x: [ 233 ForLoop(x.eval_expr(1)) 234 ]) 235 ) 236 237 self.statement_while = Statement( 238 "while", 239 expression=[ 240 AtomicSymbol("while"), 241 self.expr_def, 242 AtomicSymbol(":") 243 ], 244 init=(lambda x: [ 245 x.op(OP_STARTL), 246 x.eval_expr(1), 247 x.op(OP_CLOOP), 248 x.add_directive(lambda x: [x.op(OP_ENDL)]) 249 ]) 250 ) 251 252 self.statement_func = Statement( 253 "function", 254 expression=[ 255 AtomicSymbol("func"), 256 self.label_def, 257 self.paramlist_def, 258 AtomicSymbol("-"), 259 AtomicSymbol(">"), 260 self.label_def, 261 AtomicSymbol(":") 262 ], 263 init=( 264 lambda x: [ 265 x.new_name(1), 266 x.inc_scope(), 267 FunctionDef(x.eval_label(1), 268 x.eval_param(2), 269 x.eval_type(5)), 270 x.add_directive(lambda x: [x.op(OP_RETURN), 271 x.dec_scope()]) 272 ]) 273 ) 274 275 self.statement_proc = Statement( 276 "procedure", 277 expression=[ 278 AtomicSymbol("func"), 279 self.label_def, 280 AtomicSymbol("-"), 281 AtomicSymbol(">"), 282 self.label_def, 283 AtomicSymbol(":") 284 ], 285 init=( 286 lambda x: [ 287 x.new_name(1), 288 x.inc_scope(), 289 FunctionDef(x.eval_label(1), 290 None, 291 x.eval_type(4)), 292 x.add_directive(lambda x: [x.op(OP_RETURN), 293 x.dec_scope()]) 294 ]) 295 ) 296 297 self.statement_class = Statement( 298 "class", 299 expression=[ 300 AtomicSymbol("class"), 301 self.label_def, 302 self.paramlist_def, 303 AtomicSymbol(":") 304 ], 305 onMatch=(lambda x, y: x.add_objectType(y[1])), 306 init=(lambda x: [ 307 x.new_name(1), 308 x.ns_persist(1), 309 ClassDef(x.eval_label(1), 310 x.eval_param(2)), 311 x.add_directive(lambda x: [x.ns_save(), 312 x.op(OP_ENDCLASS)]) 313 ]) 314 ) 315 316 self.statement_pless_class = Statement( 317 "paramless_class", 318 expression=[ 319 AtomicSymbol("class"), 320 self.label_def, 321 AtomicSymbol(":") 322 ], 323 onMatch=(lambda x, y: x.add_objectType(y[1])), 324 init=(lambda x: [ 325 x.new_name(1), 326 x.ns_persist(1), 327 ClassDef(x.eval_label(1), 328 None), 329 x.add_directive(lambda x: [x.ns_save(), 330 x.op(OP_ENDCLASS)]) 331 ]) 332 ) 333 334 self.statement_new = Statement( 335 "new", 336 expression=[ 337 AtomicSymbol("var"), 338 self.label_def, 339 AtomicSymbol("="), 340 AtomicSymbol("new"), 341 self.label_def, 342 self.paramlist_def, 343 AtomicSymbol(";") 344 ], 345 init=(lambda x: [ 346 x.new_name(1), 347 x.ns_copy(1, 4), 348 NewClass(x.eval_label(1), 349 x.eval_label(4), 350 x.eval_args(5)) 351 ]) 352 ) 353 354 self.statement_unbound_new = Statement( 355 "unbound_new", 356 expression=[ 357 self.label_def, 358 AtomicSymbol("="), 359 AtomicSymbol("new"), 360 self.label_def, 361 self.paramlist_def, 362 AtomicSymbol(";") 363 ], 364 init=(lambda x: [ 365 x.ns_copy(0, 3), 366 NewClass(x.eval_label(0), 367 x.eval_label(3), 368 x.eval_args(4)) 369 ]) 370 ) 371 372 self.statement_inst = Statement( 373 "instantiation", 374 expression=[ 375 self.label_def, 376 self.label_def, 377 AtomicSymbol("="), 378 self.expr_def, 379 AtomicSymbol(";") 380 ], 381 init=(lambda x: [ 382 x.new_name(1), 383 x.ns_copy(1, 0), 384 VariableNew(x.eval_label(1), 385 x.eval_type(0)), 386 VariableAssignment(x.eval_label(1), 387 x.eval_expr(3)) 388 ]) 389 ) 390 391 self.statement_declare = Statement( 392 "declare", 393 expression=[ 394 AtomicSymbol("var"), 395 self.label_def, 396 AtomicSymbol("as"), 397 self.label_def, 398 AtomicSymbol(";") 399 ], 400 init=(lambda x: [ 401 x.new_name(1), 402 x.ns_copy(1, 3), 403 VariableNew(x.eval_label(1), 404 x.eval_type(3)) 405 ]) 406 ) 407 408 self.statement_assign = Statement( 409 "assignment", 410 expression=[ 411 self.label_def, 412 AtomicSymbol("="), 413 self.expr_def, 414 AtomicSymbol(";") 415 ], 416 init=(lambda x: [ 417 VariableAssignment(x.eval_label(0), 418 x.eval_expr(2)) 419 ]) 420 ) 421 422 423 self.statement_expression = Statement( 424 "expression", 425 expression=[ 426 self.expr_def, 427 AtomicSymbol(";") 428 ], 429 init=(lambda x: [x.eval_expr(0)]) 430 ) 431 432 self.active_tokens = [ 433 self.statement_endebug, 434 self.statement_include, 435 self.statement_codeblock_begin, 436 self.statement_codeblock_end, 437 self.statement_return, 438 self.statement_print, 439 self.statement_if, 440 self.statement_else_if, 441 self.statement_else, 442 self.statement_for, 443 self.statement_while, 444 self.statement_func, 445 self.statement_proc, 446 self.statement_pless_class, 447 self.statement_class, 448 self.statement_declare, 449 self.statement_new, 450 self.statement_unbound_new, 451 self.statement_inst, 452 self.statement_assign, 453 self.statement_expression 454 ] 455 456 # This is the definition for what is a symbol 457 self.symbols = Tokenizer(self.splitters, self.end_statements) 458 459 self.currentObjectType = "" 460 461 # This holds the program. 462 data = "" 463 # Open the file, and replace every newline with a space. 464 with open(file_name, 'r') as program: 465 data=program.read().replace('\n', '') 466 467 # Now, parse our program into statements 468 self.lines = self.symbols.generate_statements(data) 469 470 def add_objectType(self, name): 471 self.currentObjectType = name 472 pass 473 474 def get_statements(self): 475 rv = [] 476 # Go through our program statement by statement and get line numbers 477 for num, l in enumerate(self.lines): 478 # Now, for each active token we have defined, step through and find 479 # which lines match which tokens 480 # 481 # NOTE: The order of active_tokens is of most-probable to match 482 # to least-probable to match 483 fail = True 484 for a in self.active_tokens: 485 r = a.match(l) 486 # If the line matches the token, 487 if r: 488 if a.onMatch != None: 489 print("running on match thing") 490 a.onMatch(self, l) 491 492 fail = False 493 # If the token is an "incude" token, include the file 494 # specified by the "include" directive 495 if a.name == "include": 496 # Create a new Parser instance pointing to the file 497 # specified by the first arguement 498 t = Parser(r[1][0] + ".ti") 499 l = t.get_statements() 500 rv.extend(l) 501 else: 502 # We are a normal token, return the type of token 503 # along with the list of matching tokens 504 rv.append([a,r,[]]) 505 print("{}: {}\t{}".format(str(num).rjust(4), 506 a.name.rjust(15), r)) 507 break 508 509 if fail: 510 print("Error, Line #{0}".format(num)) 511 print("{}".format(l)) 512 rv = False; 513 break; 514 515 return rv 516