lalr.om (27975B)
1 // Aho, A.V., Sethi, R. and Ullman, J.D. (2007) Compilers: Principles, techniques, and Tools. Reading, Mass: Addison-Wesley. 2 3 struct lalr_ctx { 4 x: int; 5 } 6 7 struct lalr_item { 8 node: rbnode; 9 queue_next: *lalr_item; 10 prod: int; 11 alt: int; 12 point: int; 13 look: *rbnode; 14 } 15 16 struct lalr_itemset { 17 node: rbnode; 18 id: int; 19 items: *rbnode; 20 closure: *rbnode; 21 goto_next: *lalr_itemset; 22 goto_out: *rbnode; 23 mark: int; 24 } 25 26 struct lalr_point { 27 node: rbnode; 28 term: int; 29 id: int; 30 name: *byte; 31 out: *lalr_itemset; 32 } 33 34 struct lalr_alt { 35 prod: int; 36 id: int; 37 point: *lalr_point; 38 point_len: int; 39 point_cap: int; 40 } 41 42 struct lalr_prod { 43 node: rbnode; 44 id: int; 45 name: *byte; 46 47 use: *lalr_itemset; 48 epsilon: int; 49 first: *rbnode; 50 51 in_queue: int; 52 queue_next: *lalr_prod; 53 54 alt: **lalr_alt; 55 alt_len: int; 56 alt_cap: int; 57 58 action: *byte; 59 pos: int; 60 } 61 62 struct lalr_compiler { 63 a: *alloc; 64 c: *compiler; 65 err: *file; 66 67 prod_name: *rbnode; 68 prod: **lalr_prod; 69 prod_len: int; 70 prod_cap: int; 71 72 sets: *rbnode; 73 root: *lalr_itemset; 74 goto_queue: *lalr_itemset; 75 76 term_len: int; 77 sets_len: int; 78 79 act_table_len: int; 80 act_table: *int; 81 goto_table_len: int; 82 goto_table: *int; 83 } 84 85 func lalr_compiler(c: *compiler, pn: *node, err: *file) { 86 var _lc: lalr_compiler; 87 var lc: *lalr_compiler; 88 lc = &_lc; 89 lc.a = c.a; 90 lc.c = c; 91 lc.err = err; 92 lalr_rules(lc, pn); 93 lalr_items(lc); 94 lalr_first(lc); 95 lalr_lookahead(lc); 96 lalr_codegen(lc); 97 lalr_output(lc); 98 } 99 100 func lalr_mkprod(lc: *lalr_compiler): *lalr_prod { 101 var p: *lalr_prod; 102 ensure_arr(lc.a, (&lc.prod) as **void, &lc.prod_cap, lc.prod_len + 1, sizeof(*lc.prod)); 103 p = alloc(lc.a, sizeof(*p)) as *lalr_prod; 104 p.id = lc.prod_len; 105 p.use = lalr_mkitemset(lc); 106 lc.prod[lc.prod_len] = p; 107 lc.prod_len = lc.prod_len + 1; 108 return p; 109 } 110 111 func lalr_lookup(lc: *lalr_compiler, name: *byte, new: *lalr_prod): *lalr_prod { 112 var p: *rbnode; 113 var link: **rbnode; 114 var d: *lalr_prod; 115 var dir: int; 116 117 p = nil; 118 link = &lc.prod_name; 119 loop { 120 d = (*link) as *lalr_prod; 121 if !d { 122 break; 123 } 124 125 dir = strcmp(d.name, name); 126 if dir == 0 { 127 break; 128 } else if dir < 0 { 129 link = &d.node.left; 130 p = &d.node; 131 } else { 132 link = &d.node.right; 133 p = &d.node; 134 } 135 } 136 137 if !new { 138 return d; 139 } 140 141 if d { 142 die("duplicate rule"); 143 } 144 145 new.name = name; 146 rb_link(&lc.prod_name, link, p, &new.node); 147 return new; 148 } 149 150 func lalr_rules(lc: *lalr_compiler, pn: *node) { 151 var rule: *node; 152 var root: *lalr_prod; 153 var p: *lalr_prod; 154 var a: *lalr_alt; 155 var i: int; 156 157 root = lalr_mkprod(lc); 158 159 rule = pn.a; 160 loop { 161 if !rule { 162 break; 163 } 164 p = lalr_mkprod(lc); 165 lalr_lookup(lc, rule.a.a.s, p); 166 rule = rule.b; 167 } 168 169 i = 1; 170 rule = pn.a; 171 loop { 172 if !rule { 173 break; 174 } 175 p = lc.prod[i]; 176 lalr_pattern(lc, p, rule.a.b); 177 rule = rule.b; 178 i = i + 1; 179 } 180 181 a = lalr_mkalt(lc, root); 182 lalr_alt_nonterminal(lc, a, lc.prod[1]); 183 } 184 185 func lalr_pattern(lc: *lalr_compiler, p: *lalr_prod, pn: *node) { 186 var a: *lalr_alt; 187 loop { 188 if !pn { 189 break; 190 } 191 a = lalr_mkalt(lc, p); 192 lalr_alt(lc, a, pn.a); 193 pn = pn.b; 194 } 195 } 196 197 func lalr_alt(lc: *lalr_compiler, a: *lalr_alt, pn: *node) { 198 loop { 199 if !pn { 200 break; 201 } 202 lalr_primary(lc, a, pn.a); 203 pn = pn.b; 204 } 205 } 206 207 func lalr_primary(lc: *lalr_compiler, a: *lalr_alt, pn: *node) { 208 var v: *decl; 209 var s: *lalr_prod; 210 211 if pn.kind == N_IDENT { 212 s = lalr_lookup(lc, pn.s, nil); 213 if s { 214 lalr_alt_nonterminal(lc, a, s); 215 return; 216 } 217 218 v = find(lc.c, pn.s, nil, 0); 219 if !v || !v.enum_defined { 220 fputs(lc.err, "name: "); 221 fputs(lc.err, pn.s); 222 fputs(lc.err, "\n"); 223 die("undefined terminal"); 224 } 225 226 lalr_alt_terminal(lc, a, pn.s, v.enum_value); 227 return; 228 } 229 230 lalr_action(lc, a, pn); 231 } 232 233 func lalr_action(lc: *lalr_compiler, a: *lalr_alt, pn: *node) { 234 var s: *byte; 235 var b: *node; 236 var d: *node; 237 var n: *node; 238 var c: *node; 239 var j: int; 240 var ch: byte; 241 var x: int; 242 var prod: *lalr_prod; 243 var alt: *lalr_alt; 244 s = alloc(lc.a, 64); 245 n = mknode0(lc.c.p, N_IDENT); 246 memcpy(s, "lalr_action_", 12); 247 prod = lalr_mkprod(lc); 248 prod.pos = a.point_len; 249 prod.action = s; 250 int2str(&s[12], prod.id); 251 n.s = s; 252 c = mknode0(lc.c.p, N_IDENT); 253 c.s = "ctx"; 254 b = mknode0(lc.c.p, N_IDENT); 255 b.s = "lex"; 256 b = mknode1(lc.c.p, N_PTRTYPE, b); 257 b = mknode(lc.c.p, N_ARGDECL, c, b); 258 b = mknode1(lc.c.p, N_ARGLIST, b); 259 b = mknode(lc.c.p, N_FUNCTYPE, b, nil); 260 d = mknode(lc.c.p, N_FUNCDECL, n, b); 261 n = mknode(lc.c.p, N_FUNC, d, pn); 262 defun(lc.c, n); 263 alt = lalr_mkalt(lc, prod); 264 lalr_alt_nonterminal(lc, a, prod); 265 } 266 267 func lalr_mkalt(lc: *lalr_compiler, p: *lalr_prod): *lalr_alt { 268 var a: *lalr_alt; 269 a = alloc(lc.a, sizeof(*a)) as *lalr_alt; 270 a.id = p.alt_len; 271 a.prod = p.id; 272 ensure_arr(lc.a, (&p.alt) as **void, &p.alt_cap, p.alt_len + 1, sizeof(*p.alt)); 273 p.alt[p.alt_len] = a; 274 p.alt_len = p.alt_len + 1; 275 return a; 276 } 277 278 func lalr_alt_terminal(lc: *lalr_compiler, a: *lalr_alt, name: *byte, id: int) { 279 var i: int; 280 ensure_arr(lc.a, (&a.point) as **void, &a.point_cap, a.point_len + 1, sizeof(*a.point)); 281 i = a.point_len; 282 a.point_len = a.point_len + 1; 283 a.point[i].term = 1; 284 a.point[i].id = id; 285 a.point[i].name = name; 286 if id < 0 { 287 die("invalid terminal"); 288 } 289 if lc.term_len <= id { 290 lc.term_len = id + 1; 291 } 292 } 293 294 func lalr_alt_nonterminal(lc: *lalr_compiler, a: *lalr_alt, s: *lalr_prod) { 295 var use: lalr_item; 296 var i: int; 297 ensure_arr(lc.a, (&a.point) as **void, &a.point_cap, a.point_len + 1, sizeof(*a.point)); 298 i = a.point_len; 299 a.point_len = a.point_len + 1; 300 a.point[i].term = 0; 301 a.point[i].id = s.id; 302 a.point[i].name = s.name; 303 use.prod = a.prod; 304 use.alt = 0; 305 use.point = 0; 306 lalr_assoc(lc, s.use, &use); 307 } 308 309 func lalr_show(lc: *lalr_compiler) { 310 var i: int; 311 var j: int; 312 var k: int; 313 var p: *lalr_prod; 314 var a: *lalr_alt; 315 var t: *lalr_point; 316 var out: *file; 317 318 i = 0; 319 loop { 320 if i == lc.prod_len { 321 break; 322 } 323 324 p = lc.prod[i]; 325 326 j = 0; 327 loop { 328 if j == p.alt_len { 329 break; 330 } 331 332 a = p.alt[j]; 333 334 if p.name { 335 fputs(out, p.name); 336 } else { 337 fputd(out, i); 338 } 339 340 fputs(out, " -> "); 341 342 k = 0; 343 loop { 344 if k == a.point_len { 345 break; 346 } 347 348 t = &a.point[k]; 349 if t.name { 350 fputs(out, t.name); 351 } else { 352 fputd(out, t.id); 353 } 354 fputs(out, " "); 355 356 k = k + 1; 357 } 358 359 fputs(out, "\n"); 360 361 j = j + 1; 362 } 363 364 i = i + 1; 365 } 366 } 367 368 func lalr_mkitemset(lc: *lalr_compiler): *lalr_itemset { 369 var i: *lalr_itemset; 370 i = alloc(lc.a, sizeof(*i)) as *lalr_itemset; 371 return i; 372 } 373 374 func lalr_itemcmp(a: *lalr_item, b: *lalr_item): int { 375 if a.prod < b.prod { 376 return -1; 377 } else if a.prod > b.prod { 378 return 1; 379 } else if a.alt < b.alt { 380 return -1; 381 } else if a.alt > b.alt { 382 return 1; 383 } else if a.point < b.point { 384 return -1; 385 } else if a.point > b.point { 386 return 1; 387 } 388 return 0; 389 } 390 391 func lalr_itemsetcmp(a: *lalr_itemset, b: *lalr_itemset): int { 392 var x: *lalr_item; 393 var y: *lalr_item; 394 var dir: int; 395 x = rb_first(a.items) as *lalr_item; 396 y = rb_first(b.items) as *lalr_item; 397 loop { 398 if !x && !y { 399 return 0; 400 } else if !x { 401 return -1; 402 } else if !y { 403 return 1; 404 } 405 dir = lalr_itemcmp(x, y); 406 if dir != 0 { 407 return dir; 408 } 409 x = rb_next(&x.node) as *lalr_item; 410 y = rb_next(&y.node) as *lalr_item; 411 } 412 } 413 414 func lalr_assoc(lc: *lalr_compiler, i: *lalr_itemset, t: *lalr_item): *lalr_item { 415 var p: *rbnode; 416 var link: **rbnode; 417 var d: *lalr_item; 418 var dir: int; 419 p = nil; 420 link = &i.items; 421 loop { 422 d = (*link) as *lalr_item; 423 if !d { 424 break; 425 } 426 dir = lalr_itemcmp(t, d); 427 if dir < 0 { 428 link = &d.node.left; 429 p = &d.node; 430 } else if dir > 0 { 431 link = &d.node.right; 432 p = &d.node; 433 } else { 434 return nil; 435 } 436 } 437 d = alloc(lc.a, sizeof(*d)) as *lalr_item; 438 d.prod = t.prod; 439 d.alt = t.alt; 440 d.point = t.point; 441 rb_link(&i.items, link, p, &d.node); 442 return d; 443 } 444 445 func lalr_items(lc: *lalr_compiler) { 446 var i: *lalr_itemset; 447 var init_state: lalr_item; 448 449 i = lalr_mkitemset(lc); 450 lalr_assoc(lc, i, &init_state); 451 lc.root = lalr_memoset(lc, i); 452 453 loop { 454 i = lc.goto_queue; 455 if !i { 456 break; 457 } 458 lc.goto_queue = i.goto_next; 459 lalr_goto(lc, i); 460 } 461 } 462 463 func lalr_goto(lc: *lalr_compiler, i: *lalr_itemset) { 464 var t: *lalr_item; 465 var a: *lalr_item; 466 var seen: *lalr_itemset; 467 var queue: *lalr_item; 468 var prod: *lalr_prod; 469 var alt: *lalr_alt; 470 var sym: *lalr_point; 471 var r: lalr_item; 472 var j: int; 473 474 seen = lalr_mkitemset(lc); 475 476 // Queue up the items from the current state 477 t = rb_first(i.items) as *lalr_item; 478 loop { 479 if !t { 480 break; 481 } 482 a = lalr_assoc(lc, seen, t); 483 if a { 484 a.queue_next = queue; 485 queue = a; 486 } 487 t = rb_next(&t.node) as *lalr_item; 488 } 489 490 loop { 491 t = queue; 492 if !t { 493 break; 494 } 495 queue = t.queue_next; 496 497 alt = lc.prod[t.prod].alt[t.alt]; 498 if t.point == alt.point_len { 499 continue; 500 } 501 502 // That symbol is the edge in the graph 503 sym = &alt.point[t.point]; 504 505 r.prod = t.prod; 506 r.alt = t.alt; 507 r.point = t.point + 1; 508 509 lalr_assoc(lc, seen, &r); 510 511 lalr_linkout(lc, &i.goto_out, sym, &r); 512 if sym.term { 513 continue; 514 } 515 516 prod = lc.prod[sym.id]; 517 518 j = 0; 519 loop { 520 if j == prod.alt_len { 521 break; 522 } 523 524 r.prod = sym.id; 525 r.alt = j; 526 r.point = 0; 527 528 a = lalr_assoc(lc, seen, &r); 529 if a { 530 a.queue_next = queue; 531 queue = a; 532 } 533 534 j = j + 1; 535 } 536 } 537 538 sym = rb_first(i.goto_out) as *lalr_point; 539 loop { 540 if !sym { 541 break; 542 } 543 sym.out = lalr_memoset(lc, sym.out); 544 sym = rb_next(&sym.node) as *lalr_point; 545 } 546 } 547 548 func lalr_pointcmp(a: *lalr_point, b: *lalr_point): int { 549 if a.term < b.term { 550 return -1; 551 } else if a.term > b.term { 552 return 1; 553 } else if a.id < b.id { 554 return -1; 555 } else if a.id > b.id { 556 return 1; 557 } 558 return 0; 559 } 560 561 func lalr_linkout(lc: *lalr_compiler, out: **rbnode, sym: *lalr_point, t: *lalr_item) { 562 var p: *rbnode; 563 var link: **rbnode; 564 var d: *lalr_point; 565 var dir: int; 566 p = nil; 567 link = out; 568 loop { 569 d = (*link) as *lalr_point; 570 if !d { 571 break; 572 } 573 dir = lalr_pointcmp(sym, d); 574 if dir < 0 { 575 link = &d.node.left; 576 p = &d.node; 577 } else if dir > 0 { 578 link = &d.node.right; 579 p = &d.node; 580 } else { 581 lalr_assoc(lc, d.out, t); 582 return; 583 } 584 } 585 d = alloc(lc.a, sizeof(*d)) as *lalr_point; 586 d.term = sym.term; 587 d.id = sym.id; 588 d.name = sym.name; 589 d.out = lalr_mkitemset(lc); 590 rb_link(out, link, p, &d.node); 591 lalr_assoc(lc, d.out, t); 592 } 593 594 func lalr_memoset(lc: *lalr_compiler, i: *lalr_itemset): *lalr_itemset { 595 var p: *rbnode; 596 var link: **rbnode; 597 var d: *lalr_itemset; 598 var dir: int; 599 p = nil; 600 link = &lc.sets; 601 loop { 602 d = (*link) as *lalr_itemset; 603 if !d { 604 break; 605 } 606 dir = lalr_itemsetcmp(i, d); 607 if dir < 0 { 608 link = &d.node.left; 609 p = &d.node; 610 } else if dir > 0 { 611 link = &d.node.right; 612 p = &d.node; 613 } else { 614 return d; 615 } 616 } 617 rb_link(&lc.sets, link, p, &i.node); 618 i.goto_next = lc.goto_queue; 619 i.id = lc.sets_len; 620 lc.sets_len = lc.sets_len + 1; 621 lc.goto_queue = i; 622 return i; 623 } 624 625 func lalr_showlr(lc: *lalr_compiler) { 626 lalr_showlr2(lc, lc.root); 627 } 628 629 func lalr_showlr2(lc: *lalr_compiler, i: *lalr_itemset): int { 630 var t: *lalr_item; 631 var sym: *lalr_point; 632 var prod: *lalr_prod; 633 var alt: *lalr_alt; 634 var l: *lalr_point; 635 var j: int; 636 var out: *file; 637 var k: int; 638 639 if i.mark { 640 return i.mark; 641 } 642 i.mark = 1; 643 644 fputd(out, i.id); 645 fputs(out, "{"); 646 647 t = rb_first(i.items) as *lalr_item; 648 loop { 649 if !t { 650 break; 651 } 652 653 prod = lc.prod[t.prod]; 654 alt = prod.alt[t.alt]; 655 656 fputs(out, "["); 657 if prod.name { 658 fputs(out, prod.name); 659 } else { 660 fputd(out, prod.id); 661 } 662 663 fputs(out, "->"); 664 k = 0; 665 loop { 666 if k == t.point { 667 fputs(out, "!"); 668 } 669 if k == alt.point_len { 670 break; 671 } 672 sym = &alt.point[k]; 673 if sym.name { 674 fputs(out, sym.name); 675 } else { 676 fputd(out, sym.id); 677 } 678 k = k + 1; 679 if k != alt.point_len && k != t.point { 680 fputc(out, ' '); 681 } 682 } 683 fputs(out, "]"); 684 685 if t.point == alt.point_len { 686 fputs(out, "{"); 687 l = rb_first(t.look) as *lalr_point; 688 loop { 689 if !l { 690 break; 691 } 692 fputs(out, l.name); 693 l = rb_next(&l.node) as *lalr_point; 694 if l { 695 fputs(out, " "); 696 } 697 } 698 fputs(out, "}"); 699 } 700 701 t = rb_next(&t.node) as *lalr_item; 702 703 if t { 704 fputs(out, ","); 705 } 706 } 707 708 fputs(out, "};\n"); 709 710 sym = rb_first(i.goto_out) as *lalr_point; 711 loop { 712 if !sym { 713 break; 714 } 715 j = lalr_showlr2(lc, sym.out); 716 fputd(out, i.id); 717 fputs(out, "->"); 718 fputd(out, j); 719 fputs(out, "["); 720 if sym.name { 721 fputs(out, sym.name); 722 } else { 723 fputd(out, sym.id); 724 } 725 fputs(out, "]"); 726 fputs(out, ";\n"); 727 sym = rb_next(&sym.node) as *lalr_point; 728 } 729 730 return i.id; 731 } 732 733 struct lalr_look { 734 next: *lalr_look; 735 item: lalr_item; 736 itemset: *lalr_itemset; 737 look: **rbnode; 738 _look: *rbnode; 739 } 740 741 func lalr_lookahead(lc: *lalr_compiler) { 742 var queue: *lalr_look; 743 var w: *lalr_look; 744 var q: *lalr_look; 745 var r: lalr_point; 746 var sym: *lalr_point; 747 var rest_sym: *lalr_point; 748 var alt: *lalr_alt; 749 var prod: *lalr_prod; 750 var rest_prod: *lalr_prod; 751 var i: *lalr_itemset; 752 var j: int; 753 var k: int; 754 var epsilon: int; 755 756 r.term = 1; 757 r.id = lc.term_len; 758 r.name = "$"; 759 lc.term_len = lc.term_len + 1; 760 761 w = alloc(lc.a, sizeof(*w)) as *lalr_look; 762 w.item.prod = 0; 763 w.item.alt = 0; 764 w.item.point = 0; 765 w.itemset = lc.root; 766 w.look = &w._look; 767 w._look = &r.node; 768 queue = w; 769 770 loop { 771 w = queue; 772 if !w { 773 break; 774 } 775 776 if !lalr_unionlook(lc, w) { 777 queue = w.next; 778 continue; 779 } 780 781 alt = lc.prod[w.item.prod].alt[w.item.alt]; 782 if w.item.point == alt.point_len { 783 queue = w.next; 784 continue; 785 } 786 787 i = w.itemset; 788 789 sym = &alt.point[w.item.point]; 790 lalr_findout(lc, w, sym); 791 if sym.term { 792 continue; 793 } 794 prod = lc.prod[sym.id]; 795 796 epsilon = 1; 797 j = 0; 798 loop { 799 if j == prod.alt_len { 800 break; 801 } 802 alt = prod.alt[j]; 803 804 k = w.item.point; 805 loop { 806 if k == lc.prod[w.item.prod].alt[w.item.alt].point_len { 807 break; 808 } 809 rest_sym = &lc.prod[w.item.prod].alt[w.item.alt].point[k]; 810 811 if rest_sym.term { 812 q = alloc(lc.a, sizeof(*q)) as *lalr_look; 813 q.item.prod = sym.id; 814 q.item.alt = j; 815 q.item.point = 0; 816 q.itemset = i; 817 q.look = &q._look; 818 q._look = &rest_sym.node; 819 q.next = queue; 820 queue = q; 821 epsilon = 0; 822 break; 823 } else { 824 rest_prod = lc.prod[rest_sym.id]; 825 q = alloc(lc.a, sizeof(*q)) as *lalr_look; 826 q.item.prod = sym.id; 827 q.item.alt = j; 828 q.item.point = 0; 829 q.itemset = i; 830 q.look = &rest_prod.first; 831 q.next = queue; 832 queue = q; 833 if !rest_prod.epsilon { 834 epsilon = 0; 835 break; 836 } 837 } 838 839 k = k + 1; 840 } 841 842 j = j + 1; 843 } 844 845 if !epsilon { 846 continue; 847 } 848 849 j = 0; 850 loop { 851 if j == prod.alt_len { 852 break; 853 } 854 alt = prod.alt[j]; 855 856 q = alloc(lc.a, sizeof(*q)) as *lalr_look; 857 q.item.prod = sym.id; 858 q.item.alt = j; 859 q.item.point = 0; 860 q.itemset = i; 861 q.look = w.look; 862 q.next = queue; 863 queue = q; 864 865 j = j + 1; 866 } 867 } 868 } 869 870 func lalr_unionlook(lc: *lalr_compiler, w: *lalr_look): int { 871 var s: *rbnode; 872 var t: *lalr_item; 873 var ret: int; 874 875 t = lalr_finditem(lc, w.itemset, &w.item); 876 877 s = rb_first(*w.look); 878 loop { 879 if !s { 880 break; 881 } 882 883 if lalr_addlook(lc, &t.look, s as *lalr_point) { 884 ret = 1; 885 } 886 887 s = rb_next(s); 888 } 889 890 w.look = &t.look; 891 return ret; 892 } 893 894 func lalr_addlook(lc: *lalr_compiler, out: **rbnode, sym: *lalr_point): int { 895 var p: *rbnode; 896 var link: **rbnode; 897 var d: *lalr_point; 898 var dir: int; 899 p = nil; 900 link = out; 901 loop { 902 d = (*link) as *lalr_point; 903 if !d { 904 break; 905 } 906 dir = lalr_pointcmp(sym, d); 907 if dir < 0 { 908 link = &d.node.left; 909 p = &d.node; 910 } else if dir > 0 { 911 link = &d.node.right; 912 p = &d.node; 913 } else { 914 return 0; 915 } 916 } 917 d = alloc(lc.a, sizeof(*d)) as *lalr_point; 918 d.term = sym.term; 919 d.id = sym.id; 920 d.name = sym.name; 921 rb_link(out, link, p, &d.node); 922 return 1; 923 } 924 925 func lalr_findout(lc: *lalr_compiler, w: *lalr_look, sym: *lalr_point) { 926 var d: *lalr_point; 927 var r: lalr_item; 928 var dir: int; 929 d = w.itemset.goto_out as *lalr_point; 930 loop { 931 dir = lalr_pointcmp(sym, d); 932 if dir < 0 { 933 d = d.node.left as *lalr_point; 934 } else if dir > 0 { 935 d = d.node.right as *lalr_point; 936 } else { 937 break; 938 } 939 } 940 w.itemset = d.out; 941 w.item.point = w.item.point + 1; 942 } 943 944 func lalr_finditem(lc: *lalr_compiler, itemset: *lalr_itemset, item: *lalr_item): *lalr_item { 945 var p: *rbnode; 946 var link: **rbnode; 947 var d: *lalr_item; 948 var dir: int; 949 950 d = (*itemset).items as *lalr_item; 951 loop { 952 if !d { 953 break; 954 } 955 dir = lalr_itemcmp(item, d); 956 if dir < 0 { 957 d = d.node.left as *lalr_item; 958 } else if dir > 0 { 959 d = d.node.right as *lalr_item; 960 } else { 961 return d; 962 } 963 } 964 965 p = nil; 966 link = &(*itemset).closure; 967 loop { 968 d = (*link) as *lalr_item; 969 if !d { 970 break; 971 } 972 dir = lalr_itemcmp(item, d); 973 if dir < 0 { 974 link = &d.node.left; 975 p = &d.node; 976 } else if dir > 0 { 977 link = &d.node.right; 978 p = &d.node; 979 } else { 980 return d; 981 } 982 } 983 984 d = alloc(lc.a, sizeof(*d)) as *lalr_item; 985 d.prod = item.prod; 986 d.alt = item.alt; 987 d.point = item.point; 988 rb_link(&(*itemset).closure, link, p, &d.node); 989 990 return d; 991 } 992 993 func lalr_first(lc: *lalr_compiler) { 994 var use: *lalr_item; 995 var p: *lalr_prod; 996 var queue: *lalr_prod; 997 var prod: *lalr_prod; 998 var alt: *lalr_alt; 999 var sym: *lalr_point; 1000 var ref: *lalr_prod; 1001 var i: int; 1002 var j: int; 1003 var k: int; 1004 1005 i = 0; 1006 loop { 1007 if i == lc.prod_len { 1008 break; 1009 } 1010 1011 j = 0; 1012 loop { 1013 if j == lc.prod[i].alt_len { 1014 break; 1015 } 1016 1017 if lc.prod[i].alt[j].point_len == 0 { 1018 lc.prod[i].epsilon = 1; 1019 j = j + 1; 1020 continue; 1021 } 1022 1023 if lc.prod[i].alt[j].point[0].term { 1024 lalr_addlook(lc, &lc.prod[i].first, &lc.prod[i].alt[j].point[0]); 1025 } 1026 1027 j = j + 1; 1028 } 1029 1030 lc.prod[i].in_queue = 1; 1031 lc.prod[i].queue_next = queue; 1032 queue = lc.prod[i]; 1033 1034 i = i + 1; 1035 } 1036 1037 loop { 1038 p = queue; 1039 if !p { 1040 break; 1041 } 1042 p.in_queue = 0; 1043 queue = p.queue_next; 1044 1045 use = rb_first(p.use.items) as *lalr_item; 1046 loop { 1047 if !use { 1048 break; 1049 } 1050 i = use.prod; 1051 prod = lc.prod[i]; 1052 1053 j = 0; 1054 loop { 1055 if j == prod.alt_len { 1056 break; 1057 } 1058 alt = prod.alt[j]; 1059 1060 k = 0; 1061 loop { 1062 if k == alt.point_len { 1063 if !prod.epsilon { 1064 prod.epsilon = 1; 1065 if !prod.in_queue { 1066 prod.in_queue = 1; 1067 prod.queue_next = queue; 1068 queue = prod; 1069 } 1070 } 1071 break; 1072 } 1073 sym = &alt.point[k]; 1074 1075 if sym.term { 1076 if lalr_add_first(lc, &prod.first, sym) { 1077 if !prod.in_queue { 1078 prod.in_queue = 1; 1079 prod.queue_next = queue; 1080 queue = prod; 1081 } 1082 } 1083 break; 1084 } else { 1085 ref = lc.prod[sym.id]; 1086 1087 if lalr_union_first(lc, &prod.first, ref.first) { 1088 if !prod.in_queue { 1089 prod.in_queue = 1; 1090 prod.queue_next = queue; 1091 queue = prod; 1092 } 1093 } 1094 1095 if !ref.epsilon { 1096 break; 1097 } 1098 } 1099 1100 k = k + 1; 1101 } 1102 1103 j = j + 1; 1104 } 1105 1106 use = rb_next(&use.node) as *lalr_item; 1107 } 1108 } 1109 } 1110 1111 func lalr_add_first(lc: *lalr_compiler, out: **rbnode, sym: *lalr_point): int { 1112 var p: *rbnode; 1113 var link: **rbnode; 1114 var d: *lalr_point; 1115 var dir: int; 1116 p = nil; 1117 link = out; 1118 loop { 1119 d = (*link) as *lalr_point; 1120 if !d { 1121 break; 1122 } 1123 dir = lalr_pointcmp(sym, d); 1124 if dir < 0 { 1125 link = &d.node.left; 1126 p = &d.node; 1127 } else if dir > 0 { 1128 link = &d.node.right; 1129 p = &d.node; 1130 } else { 1131 return 0; 1132 } 1133 } 1134 d = alloc(lc.a, sizeof(*d)) as *lalr_point; 1135 d.term = sym.term; 1136 d.id = sym.id; 1137 d.name = sym.name; 1138 rb_link(out, link, p, &d.node); 1139 return 1; 1140 } 1141 1142 func lalr_union_first(lc: *lalr_compiler, out: **rbnode, src: *rbnode): int { 1143 var s: *rbnode; 1144 var t: *lalr_item; 1145 var ret: int; 1146 1147 s = rb_first(src); 1148 loop { 1149 if !s { 1150 break; 1151 } 1152 1153 if lalr_add_first(lc, out, s as *lalr_point) { 1154 ret = 1; 1155 } 1156 1157 s = rb_next(s); 1158 } 1159 1160 return ret; 1161 } 1162 1163 func lalr_show_first(lc: *lalr_compiler) { 1164 var i: int; 1165 var p: *lalr_prod; 1166 var d: *lalr_point; 1167 var out: *file; 1168 1169 i = 0; 1170 loop { 1171 if i == lc.prod_len { 1172 break; 1173 } 1174 p = lc.prod[i]; 1175 1176 if p.name { 1177 fputs(out, p.name); 1178 } else { 1179 fputd(out, i); 1180 } 1181 1182 fputs(out, "->{"); 1183 1184 if p.epsilon { 1185 fputs(out, "e"); 1186 1187 if p.first { 1188 fputs(out, " "); 1189 } 1190 } 1191 1192 d = rb_first(p.first) as *lalr_point; 1193 loop { 1194 if !d { 1195 break; 1196 } 1197 1198 if d.name { 1199 fputs(out, d.name); 1200 } else { 1201 fputd(out, d.id); 1202 } 1203 1204 d = rb_next(&d.node) as *lalr_point; 1205 1206 if d { 1207 fputs(out, " "); 1208 } 1209 } 1210 1211 fputs(out, "}\n"); 1212 1213 i = i + 1; 1214 } 1215 } 1216 1217 func lalr_codegen(lc: *lalr_compiler) { 1218 var i: *lalr_itemset; 1219 var t: *lalr_item; 1220 var next_tree: *rbnode; 1221 var len: int; 1222 var sym: *lalr_point; 1223 var action: int; 1224 1225 lc.act_table_len = sizeof(*lc.act_table) * lc.sets_len * lc.term_len; 1226 lc.act_table = alloc(lc.a, lc.act_table_len) as *int; 1227 lc.goto_table_len = sizeof(*lc.goto_table) * lc.sets_len * lc.prod_len; 1228 lc.goto_table = alloc(lc.a, lc.goto_table_len) as *int; 1229 1230 i = rb_first(lc.sets) as *lalr_itemset; 1231 loop { 1232 if !i { 1233 break; 1234 } 1235 1236 sym = rb_first(i.goto_out) as *lalr_point; 1237 loop { 1238 if !sym { 1239 break; 1240 } 1241 1242 if sym.term { 1243 lc.act_table[i.id * lc.term_len + sym.id] = (sym.out.id << 8) + 's'; 1244 } else { 1245 lc.goto_table[i.id * lc.prod_len + sym.id] = sym.out.id; 1246 } 1247 1248 sym = rb_next(&sym.node) as *lalr_point; 1249 } 1250 1251 t = rb_first(i.items) as *lalr_item; 1252 next_tree = i.closure; 1253 loop { 1254 if !t { 1255 if next_tree { 1256 t = rb_first(next_tree) as *lalr_item; 1257 next_tree = nil; 1258 continue; 1259 } 1260 break; 1261 } 1262 1263 len = lc.prod[t.prod].alt[t.alt].point_len; 1264 if t.point != len { 1265 t = rb_next(&t.node) as *lalr_item; 1266 continue; 1267 } 1268 1269 sym = rb_first(t.look) as *lalr_point; 1270 loop { 1271 if !sym { 1272 break; 1273 } 1274 1275 if lc.act_table[i.id * lc.term_len + sym.id] { 1276 fputc(nil, lc.act_table[i.id * lc.term_len + sym.id]); 1277 die("/reduce conflict"); 1278 } 1279 1280 if t.prod == 0 && t.alt == 0 && t.point == 1 { 1281 lc.act_table[i.id * lc.term_len + sym.id] = 'a'; 1282 } else { 1283 lc.act_table[i.id * lc.term_len + sym.id] = (len << 32) + (t.prod << 8) + 'r'; 1284 } 1285 1286 sym = rb_next(&sym.node) as *lalr_point; 1287 } 1288 1289 t = rb_next(&t.node) as *lalr_item; 1290 } 1291 1292 i = rb_next(&i.node) as *lalr_itemset; 1293 } 1294 1295 if lc.prod[1].epsilon { 1296 lc.act_table[0] = 'a'; 1297 } 1298 } 1299 1300 func lalr_output(lc: *lalr_compiler) { 1301 var ic: *irfunc; 1302 var o: *irop; 1303 var ret_type: *type; 1304 var func_type: *type; 1305 1306 ret_type = mktype0(lc.c, TY_INT); 1307 ret_type = mktype1(lc.c, TY_PTR, ret_type); 1308 1309 func_type = mktype2(lc.c, TY_FUNC, ret_type, nil); 1310 1311 ic = mkirfunc(lc.c, "get_act_table"); 1312 o = mkirstr(ic, lc.act_table as *byte, lc.act_table_len); 1313 irreturn(ic, o); 1314 define_ir_func(lc.c, ic, func_type); 1315 1316 ic = mkirfunc(lc.c, "get_goto_table"); 1317 o = mkirstr(ic, lc.goto_table as *byte, lc.goto_table_len); 1318 irreturn(ic, o); 1319 define_ir_func(lc.c, ic, func_type); 1320 1321 ret_type = mktype0(lc.c, TY_INT); 1322 1323 func_type = mktype2(lc.c, TY_FUNC, ret_type, nil); 1324 1325 ic = mkirfunc(lc.c, "get_prod_len"); 1326 o = mkirconst(ic, lc.prod_len); 1327 irreturn(ic, o); 1328 define_ir_func(lc.c, ic, func_type); 1329 1330 ic = mkirfunc(lc.c, "get_term_len"); 1331 o = mkirconst(ic, lc.term_len); 1332 irreturn(ic, o); 1333 define_ir_func(lc.c, ic, func_type); 1334 1335 lalr_compile_get_call_table(lc); 1336 } 1337 1338 func lalr_compile_get_call_table(lc: *lalr_compiler) { 1339 var arg1_type: *type; 1340 var args_type: *type; 1341 var ret_type: *type; 1342 var func_type: *type; 1343 var ic: *irfunc; 1344 var o: *irop; 1345 var a: *irop; 1346 var b: *irop; 1347 var this: *irblock; 1348 var next: *irblock; 1349 var prod: *lalr_prod; 1350 var ident: *node; 1351 var i: int; 1352 var v: *irvar; 1353 var pos_table: *int; 1354 var pos_table_len: int; 1355 1356 arg1_type = mktype0(lc.c, TY_VOID); 1357 arg1_type = mktype1(lc.c, TY_PTR, arg1_type); 1358 args_type = mktype1(lc.c, TY_ARG, arg1_type); 1359 ret_type = mktype0(lc.c, TY_VOID); 1360 func_type = mktype2(lc.c, TY_FUNC, ret_type, args_type); 1361 arg1_type = mktype1(lc.c, TY_PTR, func_type); 1362 args_type = mktype1(lc.c, TY_ARG, arg1_type); 1363 ret_type = mktype0(lc.c, TY_VOID); 1364 func_type = mktype2(lc.c, TY_FUNC, ret_type, args_type); 1365 1366 ic = mkirfunc(lc.c, "get_call_table"); 1367 1368 v = iraddarg(ic, "t", arg1_type); 1369 1370 pos_table_len = lc.prod_len * sizeof(*pos_table); 1371 pos_table = alloc(lc.a, pos_table_len) as *int; 1372 1373 i = 0; 1374 loop { 1375 if i == lc.prod_len { 1376 break; 1377 } 1378 prod = lc.prod[i]; 1379 pos_table[i] = prod.pos; 1380 if prod.action { 1381 a = mkirop(ic, IOP_VAR, nil, nil); 1382 a.n = v.n; 1383 b = mkirconst(ic, 8 * i); 1384 a = mkirop(ic, IOP_ADD, a, b); 1385 a = mkirop(ic, IOP_LOAD, a, nil); 1386 b = mkirfuncref(ic, prod.action); 1387 o = mkirop(ic, IOP_STORE, a, b); 1388 o.t = arg1_type.val; 1389 iraddop(ic, o); 1390 } 1391 i = i + 1; 1392 } 1393 1394 o = mkirconst(ic, 0); 1395 irreturn(ic, o); 1396 1397 define_ir_func(lc.c, ic, func_type); 1398 1399 ret_type = mktype0(lc.c, TY_INT); 1400 ret_type = mktype1(lc.c, TY_PTR, ret_type); 1401 1402 func_type = mktype2(lc.c, TY_FUNC, ret_type, nil); 1403 1404 ic = mkirfunc(lc.c, "get_pos_table"); 1405 o = mkirstr(ic, pos_table as *byte, pos_table_len); 1406 irreturn(ic, o); 1407 define_ir_func(lc.c, ic, func_type); 1408 } 1409 1410 func get_act_table(): *int; 1411 func get_goto_table(): *int; 1412 func get_call_table(t: *func(ctx: *void)); 1413 func get_pos_table(): *int; 1414 func get_prod_len(): int; 1415 func get_term_len(): int; 1416 1417 func dolalr(c: *compiler, name: *byte, out: *file): *lex_sem { 1418 var act_table: *int; 1419 var goto_table: *int; 1420 var tok: int; 1421 var state: int; 1422 var term_len: int; 1423 var prod_len: int; 1424 var l: *lex; 1425 var fd: int; 1426 var act: int; 1427 var act_out: int; 1428 var act_len: int; 1429 var stack: *int; 1430 var stack_len: int; 1431 var stack_cap: int; 1432 var pos_table: *int; 1433 var call_table: *func(ctx: *lex); 1434 1435 l = setup_lex(c); 1436 fd = open(name, 0, 0); 1437 if fd < 0 { 1438 die("open failed"); 1439 } 1440 open_lex(l, fd); 1441 1442 call_table = alloc(l.a, sizeof(*call_table) * get_prod_len()) as *func(ctx: *lex); 1443 1444 act_table = get_act_table(); 1445 goto_table = get_goto_table(); 1446 get_call_table(call_table as *func(ctx: *void)); 1447 pos_table = get_pos_table(); 1448 term_len = get_term_len(); 1449 prod_len = get_prod_len(); 1450 1451 c.filename = name; 1452 c.lineno = 1; 1453 c.colno = 1; 1454 1455 state = 0; 1456 l.val = &l._val; 1457 tok = gettok(l); 1458 loop { 1459 if tok < 0 && l.eof { 1460 tok = term_len - 1; 1461 } else if tok < 0 || tok >= term_len { 1462 die("invalid token"); 1463 } 1464 1465 act = act_table[state * term_len + tok] & 255; 1466 act_out = (act_table[state * term_len + tok] >> 8) & 0xffffff; 1467 act_len = act_table[state * term_len + tok] >> 32; 1468 1469 if act == 's' { 1470 ensure_arr(l.a, (&l.stack) as **void, &l.stack_cap, l.stack_len + 1, sizeof(*l.stack)); 1471 memcpy((&l.stack[l.stack_len]) as *byte, l.val as *byte, sizeof(*l.val)); 1472 l.stack_len = l.stack_len + 1; 1473 1474 ensure_arr(l.a, (&stack) as **void, &stack_cap, stack_len + 1, sizeof(*stack)); 1475 stack[stack_len] = state; 1476 stack_len = stack_len + 1; 1477 1478 state = act_out; 1479 memset(l.val as *byte, 0, sizeof(*l.val)); 1480 tok = gettok(l); 1481 } else if act == 'r' { 1482 ensure_arr(l.a, (&l.stack) as **void, &l.stack_cap, l.stack_len + 1, sizeof(*l.stack)); 1483 if act_len != 0 { 1484 if act_len > stack_len { 1485 die("stack underflow"); 1486 } 1487 stack_len = stack_len - act_len; 1488 state = stack[stack_len]; 1489 memcpy((&l.stack[l.stack_len - act_len]) as *byte, (&l.stack[l.stack_len - 1]) as *byte, sizeof(*l.stack)); 1490 } else { 1491 l.sem_len = pos_table[act_out]; 1492 l.sem = &l.stack[l.stack_len - l.sem_len]; 1493 if call_table[act_out] { 1494 l.val = &l.stack[l.stack_len]; 1495 if l.sem_len != 0 && l.sem[0].n { 1496 l.c.lineno = l.sem[0].n.lineno; 1497 l.c.colno = l.sem[0].n.colno; 1498 } else { 1499 l.c.lineno = l.start_lineno; 1500 l.c.colno = l.start_colno; 1501 } 1502 call_table[act_out](l); 1503 l.val = &l._val; 1504 } else { 1505 memset((&l.stack[l.stack_len]) as *byte, 0, sizeof(*l.val)); 1506 } 1507 } 1508 l.stack_len = l.stack_len - act_len + 1; 1509 1510 ensure_arr(l.a, (&stack) as **void, &stack_cap, stack_len + 1, sizeof(*stack)); 1511 stack[stack_len] = state; 1512 stack_len = stack_len + 1; 1513 1514 state = goto_table[state * prod_len + act_out]; 1515 } else if act == 'a' { 1516 if tok != term_len - 1 { 1517 die("accept not at eof"); 1518 } 1519 return &l.stack[0]; 1520 } else { 1521 fputs(nil, "on "); 1522 fputs(nil, name); 1523 fputs(nil, ":"); 1524 fputd(nil, l.start_lineno); 1525 fputs(nil, ":"); 1526 fputd(nil, l.start_colno); 1527 die(" syntax error"); 1528 } 1529 } 1530 }