language

some fools attempt at an interpreted language
Log | Files | Refs | README

parser.py (12797B)


      1 from lexer import *
      2 from memonic import *
      3 from bytecode import *
      4 
      5 class Parser():
      6 	def __init__(self, file_name):
      7 		self.splitters = [
      8 			":",
      9 			";",
     10 			"\(",
     11 			"\)",
     12 			"\[",
     13 			"\]",
     14 			"{",
     15 			"}",
     16 			",",
     17 			"\+",
     18 			"\-",
     19 			"\*",
     20 			"\/",
     21 			"\t",
     22 			" "
     23 		]
     24 		self.end_statements = [
     25 			":",
     26 			";",
     27 			"{",
     28 			"}"
     29 		]
     30 
     31 		self.known_tokens = [
     32 			"return",
     33 			"print",
     34 			"if",
     35 			"else",
     36 			"for",
     37 			"while",
     38 			"func",
     39 			"class",
     40 			"var"
     41 		]
     42 
     43 		self.TYPE_VOID = 0
     44 		self.TYPE_ADDR = 1
     45 		self.TYPE_TYPE = 2
     46 		self.TYPE_PLIST = 3
     47 		self.TYPE_FUNC = 4
     48 		self.TYPE_CLASS = 5
     49 		self.TYPE_OBJECT = 6
     50 		self.TYPE_INT = 7
     51 		self.TYPE_FLOAT = 8
     52 		self.TYPE_CHAR = 9
     53 		self.TYPE_STRING = 10
     54 		self.TYPE_STATIC_ARRAY = 11
     55 		self.TYPE_ARRAY = 12
     56 		self.TYPE_HASHTABLE = 13
     57 		self.TYPE_STACK = 14
     58 
     59 		self.defined_types = [
     60 			"void",
     61 			"_addr",
     62 			"_type",
     63 			"_plist",
     64 			"func",
     65 			"class",
     66 			"object",
     67 			"int",
     68 			"float",
     69 			"char",
     70 			"string",
     71 			"s_array",
     72 			"array",
     73 			"hashtable",
     74 			"stack"
     75 		]
     76 
     77 	# Defines what integers look like
     78 		self.int_def       = AtomicSymbol("^[0-9]+$")
     79 	# Regular expression for encapsulating text in `"`, simply
     80 		self.str_def       = AtomicSymbol("^\0+")
     81 	# Defines what type names exists
     82 		self.type_def      = InclusiveSymbol(self.defined_types)
     83 	# Defines what reserved names exists
     84 		self.label_def     = ExclusiveSymbol([self.int_def]     +
     85 		                                     [self.str_def]     +
     86 		                                     self.known_tokens   )
     87 	# Defines the parameter list defintion
     88 		self.paramlist_def = GroupingSymbol( [
     89 		                                      AtomicSymbol("\("),
     90 		                                      AtomicSymbol("\)")
     91 		                                     ] )
     92 	# Defines the expression definition
     93 		self.expr_def = PolySymbol( [
     94 		                             self.label_def,
     95 		                             self.int_def,
     96 		                             AtomicSymbol("\("),
     97 		                             AtomicSymbol("\)"),
     98 		                             AtomicSymbol("\+"),
     99 		                             AtomicSymbol("\-"),
    100 		                             AtomicSymbol("\*"),
    101 		                             AtomicSymbol("\/"),
    102 		                             AtomicSymbol("\>"),
    103 		                             AtomicSymbol("\<"),
    104 		                             AtomicSymbol("=\<"),
    105 		                             AtomicSymbol("\>="),
    106 		                             AtomicSymbol("=="),
    107 		                             AtomicSymbol("\""),
    108 		                             AtomicSymbol("'"),
    109 		                             AtomicSymbol("\0")
    110 		                            ], terminator=[
    111 		                                           AtomicSymbol(";"),
    112 		                                           AtomicSymbol(":")
    113 		                                          ])
    114 
    115 		self.statement_endebug = Statement(
    116 			"endebug",
    117 			expression=[
    118 				AtomicSymbol("DEBUG"),
    119 				AtomicSymbol(";")
    120 			],
    121 			init=(lambda x: [x.op(OP_DEBUG)])
    122 		)
    123 
    124 		self.statement_include = Statement(
    125 			"include",
    126 			expression=[
    127 				AtomicSymbol("include"),
    128 				self.label_def,
    129 				AtomicSymbol(";")
    130 			],
    131 			init=(lambda x: [])
    132 		)
    133 
    134 		self.statement_codeblock_begin = Statement(
    135 			"codeblock_begin",
    136 			expression=[
    137 				AtomicSymbol("{")
    138 			],
    139 			init=(lambda x: [x.push_directives()])
    140 		)
    141 
    142 		self.statement_codeblock_end = Statement(
    143 			"codeblock_end",
    144 			expression=[
    145 				AtomicSymbol("}")
    146 			],
    147 			init=(lambda x: [x.pop_directives()])
    148 		)
    149 
    150 		self.statement_return = Statement(
    151 			"return",
    152 			expression=[
    153 				AtomicSymbol("return"),
    154 				self.expr_def,
    155 				AtomicSymbol(";")
    156 			],
    157 			init=(lambda x: [
    158 			                 x.eval_expr(1),
    159 			                 x.op(OP_STV),
    160 			                 x.op(0x00),
    161 			                 x.op(0x00),
    162 			                 x.op(0x00)
    163 			                ])
    164 		)
    165 
    166 		self.statement_print = Statement(
    167 			"print",
    168 			expression=[
    169 				AtomicSymbol("print"),
    170 				self.expr_def,
    171 				AtomicSymbol(";")
    172 			],
    173 			init=(lambda x: [
    174 			                 x.eval_expr(1),
    175 			                 x.op(OP_PRINT)
    176 			                ])
    177 		)
    178 
    179 		self.statement_if = Statement(
    180 			"if",
    181 			expression=[
    182 				AtomicSymbol("if"),
    183 				self.expr_def,
    184 				AtomicSymbol(":")
    185 			],
    186 			init=(lambda x: [
    187 			                 x.eval_expr(1),
    188 			                 x.op(OP_IFDO),
    189 			                 x.add_directive(lambda x: [x.op(OP_DONE)],
    190 			                                 cond=(
    191 			                 lambda x: x.nxt(1)[0].name in ["else", "else_if"]))
    192 			                ])
    193 		)
    194 
    195 		self.statement_else_if = Statement(
    196 			"else_if",
    197 			expression=[
    198 				AtomicSymbol("else"),
    199 				AtomicSymbol("if"),
    200 				self.expr_def,
    201 				AtomicSymbol(":")
    202 			],
    203 			init=(lambda x: [
    204 			                 x.op(OP_ELSE),
    205 			                 x.eval_expr(2),
    206 			                 x.op(OP_IFDO),
    207 			                 x.add_directive(lambda x: [x.op(OP_DONE)],
    208 			                                 cond=(
    209 			                 lambda x: x.nxt(1)[0].name in ["else", "else_if"]))
    210 			                ])
    211 		)
    212 
    213 		self.statement_else = Statement(
    214 			"else",
    215 			expression=[
    216 				AtomicSymbol("else"),
    217 				AtomicSymbol(":")
    218 			],
    219 			init=(lambda x: [
    220 			                 x.op(OP_ELSE),
    221 			                 x.add_directive(lambda x: [x.op(OP_DONE)])
    222 			                ])
    223 		)
    224 
    225 		self.statement_for = Statement(
    226 			"for",
    227 			expression=[
    228 				AtomicSymbol("for"),
    229 				self.expr_def,
    230 				AtomicSymbol(":")
    231 			],
    232 			init=(lambda x: [
    233 			                 ForLoop(x.eval_expr(1))
    234 			                ])
    235 		)
    236 
    237 		self.statement_while = Statement(
    238 			"while",
    239 			expression=[
    240 				AtomicSymbol("while"),
    241 				self.expr_def,
    242 				AtomicSymbol(":")
    243 			],
    244 			init=(lambda x: [
    245 			                 x.op(OP_STARTL),
    246 			                 x.eval_expr(1),
    247 			                 x.op(OP_CLOOP),
    248 			                 x.add_directive(lambda x: [x.op(OP_ENDL)])
    249 			                ])
    250 		)
    251 
    252 		self.statement_func = Statement(
    253 			"function",
    254 			expression=[
    255 				AtomicSymbol("func"),
    256 				self.label_def,
    257 				self.paramlist_def,
    258 				AtomicSymbol("-"),
    259 				AtomicSymbol(">"),
    260 				self.label_def,
    261 				AtomicSymbol(":")
    262 			],
    263 			init=(
    264 				lambda x: [
    265 				           x.new_name(1),
    266 				           x.inc_scope(),
    267 				           FunctionDef(x.eval_label(1),
    268 				                       x.eval_param(2),
    269 				                       x.eval_type(5)),
    270 				           x.add_directive(lambda x: [x.op(OP_RETURN),
    271 				                                      x.dec_scope()])
    272 				          ])
    273 		)
    274 
    275 		self.statement_proc = Statement(
    276 			"procedure",
    277 			expression=[
    278 				AtomicSymbol("func"),
    279 				self.label_def,
    280 				AtomicSymbol("-"),
    281 				AtomicSymbol(">"),
    282 				self.label_def,
    283 				AtomicSymbol(":")
    284 			],
    285 			init=(
    286 				lambda x: [
    287 				           x.new_name(1),
    288 				           x.inc_scope(),
    289 				           FunctionDef(x.eval_label(1),
    290 				                       None,
    291 				                       x.eval_type(4)),
    292 				           x.add_directive(lambda x: [x.op(OP_RETURN),
    293 				                                      x.dec_scope()])
    294 				          ])
    295 		)
    296 
    297 		self.statement_class = Statement(
    298 			"class",
    299 			expression=[
    300 				AtomicSymbol("class"),
    301 				self.label_def,
    302 				self.paramlist_def,
    303 				AtomicSymbol(":")
    304 			],
    305 			onMatch=(lambda x, y: x.add_objectType(y[1])),
    306 			init=(lambda x: [
    307 			                 x.new_name(1),
    308 			                 x.ns_persist(1),
    309 			                 ClassDef(x.eval_label(1),
    310 			                          x.eval_param(2)),
    311 			                 x.add_directive(lambda x: [x.ns_save(),
    312 			                                            x.op(OP_ENDCLASS)])
    313 			                ])
    314 		)
    315 
    316 		self.statement_pless_class = Statement(
    317 			"paramless_class",
    318 			expression=[
    319 				AtomicSymbol("class"),
    320 				self.label_def,
    321 				AtomicSymbol(":")
    322 			],
    323 			onMatch=(lambda x, y: x.add_objectType(y[1])),
    324 			init=(lambda x: [
    325 			                 x.new_name(1),
    326 			                 x.ns_persist(1),
    327 			                 ClassDef(x.eval_label(1),
    328 			                          None),
    329 			                 x.add_directive(lambda x: [x.ns_save(),
    330 			                                            x.op(OP_ENDCLASS)])
    331 			                ])
    332 		)
    333 
    334 		self.statement_new = Statement(
    335 			"new",
    336 			expression=[
    337 				AtomicSymbol("var"),
    338 				self.label_def,
    339 				AtomicSymbol("="),
    340 				AtomicSymbol("new"),
    341 				self.label_def,
    342 				self.paramlist_def,
    343 				AtomicSymbol(";")
    344 			],
    345 			init=(lambda x: [
    346 			                 x.new_name(1),
    347 			                 x.ns_copy(1, 4),
    348 			                 NewClass(x.eval_label(1),
    349 			                          x.eval_label(4),
    350 			                          x.eval_args(5))
    351 			                ])
    352 		)
    353 
    354 		self.statement_unbound_new = Statement(
    355 			"unbound_new",
    356 			expression=[
    357 				self.label_def,
    358 				AtomicSymbol("="),
    359 				AtomicSymbol("new"),
    360 				self.label_def,
    361 				self.paramlist_def,
    362 				AtomicSymbol(";")
    363 			],
    364 			init=(lambda x: [
    365 			                 x.ns_copy(0, 3),
    366 			                 NewClass(x.eval_label(0),
    367 			                          x.eval_label(3),
    368 			                          x.eval_args(4))
    369 			                ])
    370 		)
    371 
    372 		self.statement_inst = Statement(
    373 			"instantiation",
    374 			expression=[
    375 				self.label_def,
    376 				self.label_def,
    377 				AtomicSymbol("="),
    378 				self.expr_def,
    379 				AtomicSymbol(";")
    380 			],
    381 			init=(lambda x: [
    382 			                 x.new_name(1),
    383 			                 x.ns_copy(1, 0),
    384 			                 VariableNew(x.eval_label(1),
    385 			                             x.eval_type(0)),
    386 			                 VariableAssignment(x.eval_label(1),
    387 			                                    x.eval_expr(3))
    388 			                ])
    389 		)
    390 
    391 		self.statement_declare = Statement(
    392 			"declare",
    393 			expression=[
    394 				AtomicSymbol("var"),
    395 				self.label_def,
    396 				AtomicSymbol("as"),
    397 				self.label_def,
    398 				AtomicSymbol(";")
    399 			],
    400 			init=(lambda x: [
    401 			                 x.new_name(1),
    402 			                 x.ns_copy(1, 3),
    403 			                 VariableNew(x.eval_label(1),
    404 			                             x.eval_type(3))
    405 			                ])
    406 		)
    407 
    408 		self.statement_assign = Statement(
    409 			"assignment",
    410 			expression=[
    411 				self.label_def,
    412 				AtomicSymbol("="),
    413 				self.expr_def,
    414 				AtomicSymbol(";")
    415 			],
    416 			init=(lambda x: [
    417 			                 VariableAssignment(x.eval_label(0),
    418 			                                    x.eval_expr(2))
    419 			                ])
    420 		)
    421 
    422 
    423 		self.statement_expression = Statement(
    424 			"expression",
    425 			expression=[
    426 				self.expr_def,
    427 				AtomicSymbol(";")
    428 			],
    429 			init=(lambda x: [x.eval_expr(0)])
    430 		)
    431 
    432 		self.active_tokens = [
    433 			self.statement_endebug,
    434 			self.statement_include,
    435 			self.statement_codeblock_begin,
    436 			self.statement_codeblock_end,
    437 			self.statement_return,
    438 			self.statement_print,
    439 			self.statement_if,
    440 			self.statement_else_if,
    441 			self.statement_else,
    442 			self.statement_for,
    443 			self.statement_while,
    444 			self.statement_func,
    445 			self.statement_proc,
    446 			self.statement_pless_class,
    447 			self.statement_class,
    448 			self.statement_declare,
    449 			self.statement_new,
    450 			self.statement_unbound_new,
    451 			self.statement_inst,
    452 			self.statement_assign,
    453 			self.statement_expression
    454 		]
    455 
    456 		# This is the definition for what is a symbol
    457 		self.symbols = Tokenizer(self.splitters, self.end_statements)
    458 
    459 		self.currentObjectType = ""
    460 
    461 		# This holds the program.
    462 		data = ""
    463 		# Open the file, and replace every newline with a space.
    464 		with open(file_name, 'r') as program:
    465 		    data=program.read().replace('\n', '')
    466 
    467 		# Now, parse our program into statements
    468 		self.lines = self.symbols.generate_statements(data)
    469 
    470 	def add_objectType(self, name):
    471 		self.currentObjectType = name
    472 		pass	
    473 
    474 	def get_statements(self):
    475 		rv = []
    476 		# Go through our program statement by statement and get line numbers
    477 		for num, l in enumerate(self.lines):
    478 		#   Now, for each active token we have defined, step through and find
    479 		#   which lines match which tokens
    480 		#
    481 		#   NOTE: The order of active_tokens is of most-probable to match
    482 		#         to least-probable to match
    483 			fail = True
    484 			for a in self.active_tokens:
    485 				r = a.match(l)
    486 				# If the line matches the token,
    487 				if r:
    488 					if a.onMatch != None:
    489 						print("running on match thing")
    490 						a.onMatch(self, l)
    491 
    492 					fail = False
    493 					#   If the token is an "incude" token, include the file
    494 					#   specified by the "include" directive
    495 					if a.name == "include":
    496 						# Create a new Parser instance pointing to the file
    497 						# specified by the first arguement
    498 						t = Parser(r[1][0] + ".ti")
    499 						l = t.get_statements()
    500 						rv.extend(l)
    501 					else:
    502 					#   We are a normal token, return the type of token
    503 					#   along with the list of matching tokens
    504 						rv.append([a,r,[]])
    505 						print("{}: {}\t{}".format(str(num).rjust(4),
    506 						                            a.name.rjust(15), r))
    507 					break
    508 
    509 			if fail:
    510 				print("Error, Line #{0}".format(num))
    511 				print("{}".format(l))
    512 				rv = False;
    513 				break;
    514 
    515 		return rv
    516