commit 926ecdedf8184453554bc0a2ae8a4588d25c0f00
parent 36d43fdc15a7021e3d8dfd67f098cffdc92c357f
Author: erai <erai@omiltem.net>
Date: Sat, 14 Sep 2024 12:10:36 -0400
Translate peg to code
Diffstat:
M | bufio.c | | | 12 | ++++++++++++ |
M | peg.c | | | 473 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
M | syscall.c | | | 4 | ++++ |
3 files changed, 461 insertions(+), 28 deletions(-)
diff --git a/bufio.c b/bufio.c
@@ -154,6 +154,18 @@ fputs(f: *file, s: *byte) {
}
}
+fputb(f: *file, s: *byte, n: int) {
+ var i: int;
+ i = 0;
+ loop {
+ if i >= n {
+ break;
+ }
+ fputc(f, s[i]:int);
+ i = i + 1;
+ }
+}
+
fseek(f: *file, off: int) {
f.r = 0;
f.w = 0;
diff --git a/peg.c b/peg.c
@@ -14,6 +14,7 @@ struct peg_op {
struct peg {
a: *alloc;
f: *file;
+ fout: *file;
src: *byte;
size: int;
pos: int;
@@ -27,6 +28,7 @@ struct peg {
nstack: **peg_node;
np: int;
ncap: int;
+ scratch: *byte;
}
enum {
@@ -693,37 +695,454 @@ construct(c: *peg): *peg_node {
}
}
-show(n: *peg_node) {
- fdputc(1, '(');
- fdputs(1, tag_to_str(n.tag));
- if n.child {
- n = n.child;
- loop {
- if !n {
- break;
+enum {
+ LOOK_NORMAL,
+ LOOK_NOT,
+ LOOK_AND,
+}
+
+decode_look(n: *peg_node): int {
+ var ret: int;
+
+ ret = LOOK_NORMAL;
+ if n.child.tag == P_lookop {
+ if n.child.str[0] == '!':byte {
+ ret = LOOK_NOT;
+ } else if n.child.str[0] == '&':byte {
+ ret = LOOK_AND;
+ }
+ }
+
+ return ret;
+}
+
+enum {
+ ZERO_OR_ONE,
+ EXACTLY_ONE,
+ ZERO_OR_MORE,
+ ONE_OR_MORE,
+}
+
+decode_count(n: *peg_node): int {
+ var ret: int;
+
+ ret = EXACTLY_ONE;
+ n = n.child;
+ loop {
+ if !n {
+ return ret;
+ }
+
+ if n.tag == P_countop {
+ if n.str[0] == '?':byte {
+ if ret == EXACTLY_ONE {
+ ret = ZERO_OR_ONE;
+ } else if ret == ONE_OR_MORE {
+ ret = ZERO_OR_MORE;
+ }
+ } else if n.str[0] == '*':byte {
+ ret = ZERO_OR_MORE;
+ } else if n.str[0] == '+':byte {
+ if ret == ZERO_OR_ONE {
+ ret = ZERO_OR_MORE;
+ } else if ret == EXACTLY_ONE {
+ ret = ONE_OR_MORE;
+ } else if ret == ZERO_OR_MORE {
+ ret = ZERO_OR_MORE;
+ }
+ } else {
+ die("invalid countop");
}
+ }
- fdputc(1, ' ');
- show(n);
+ n = n.next;
+ }
+}
- n = n.next;
+translate_literal(c: *peg, n: *peg_node) {
+ var i: int;
+ var len: int;
+ var ch: int;
+ var hex: *byte;
+
+ hex = "0123456789abcdef";
+
+ i = 1;
+ len = n.len - 1;
+ loop {
+ if i == len {
+ break;
+ }
+
+ ch = n.str[i]:int;
+
+ if ch < 32 || ch > 127 {
+ fputc(c.fout, '\\');
+ fputc(c.fout, 'x');
+ fputc(c.fout, hex[ch >> 4]:int);
+ fputc(c.fout, hex[ch & 15]:int);
+ } else {
+ fputc(c.fout, ch);
+ }
+
+ i = i + 1;
+ }
+}
+
+hexdig(c: byte): int {
+ var ch: int;
+
+ ch = c:int;
+
+ if ch >= '0' && ch <= '9' {
+ return ch - '0';
+ }
+
+ if ch >= 'a' && ch <= 'f' {
+ return ch - 'a' + 10;
+ }
+
+ if ch >= 'A' && ch <= 'F' {
+ return ch - 'A' + 10;
+ }
+
+ die("invalid hex digit");
+}
+
+parse_escape(s: *byte, i: *int, n: int): int {
+ var nc: int;
+
+ if *i == n {
+ die("invalid escape");
+ }
+
+ nc = s[*i]:int;
+ *i = *i + 1;
+
+ if nc == 't' {
+ return '\t';
+ } else if nc == 'r' {
+ return '\r';
+ } else if nc == 'n' {
+ return '\n';
+ } else if nc == '\\' {
+ return '\\';
+ } else if nc == '\'' {
+ return '\'';
+ } else if nc == '"' {
+ return '"';
+ } else if nc == '-' {
+ return '-';
+ } else if nc == '[' {
+ return '[';
+ } else if nc == ']' {
+ return ']';
+ } else if nc == 'x' {
+ if n - *i < 2 {
+ die("invalid escape");
}
+ nc = hexdig(s[*i]) * 16 + hexdig(s[*i + 1]);
+ *i = *i + 2;
+ return nc;
} else {
- fdputc(1, ' ');
- fdputc(1, '"');
- write(1, n.str, n.len);
- fdputc(1, '"');
+ die("invalid escape");
+ }
+}
+
+translate_charset(c: *peg, n: *peg_node) {
+ var i: int;
+ var len: int;
+ var ch: int;
+ var a: int;
+ var b: int;
+ var hex: *byte;
+
+ hex = "0123456789abcdef";
+
+ memset(c.scratch, 0, 256);
+
+ i = 1;
+ len = n.len - 1;
+ loop {
+ if i == len {
+ break;
+ }
+
+ ch = n.str[i]:int;
+ i = i + 1;
+
+ if ch == '\\' {
+ ch = parse_escape(n.str, &i, len);
+ }
+
+ if i < len && n.str[i] == '-':byte {
+ i = i + 1;
+
+ if i == len {
+ die("invalid range");
+ }
+
+ a = ch;
+
+ ch = n.str[i]:int;
+ i = i + 1;
+
+ if ch == '\\' {
+ ch = parse_escape(n.str, &i, len);
+ }
+
+ b = ch;
+
+ loop {
+ if a > b {
+ break;
+ }
+
+ c.scratch[a] = 1:byte;
+
+ a = a + 1;
+ }
+ } else {
+ c.scratch[ch] = 1:byte;
+ }
+ }
+
+ i = 1;
+ loop {
+ if i == 256 {
+ break;
+ }
+
+ if c.scratch[i] {
+ if i < 32 || i > 127 {
+ fputc(c.fout, '\\');
+ fputc(c.fout, 'x');
+ fputc(c.fout, hex[i >> 4]:int);
+ fputc(c.fout, hex[i & 15]:int);
+ } else {
+ fputc(c.fout, i);
+ }
+ }
+
+ i = i + 1;
+ }
+}
+
+translate_pattern(c: *peg, n: *peg_node) {
+ var count: int;
+ var look: int;
+ var d: *peg_node;
+
+ loop {
+ if n.tag == P_pattern {
+ d = n.child;
+ if !d.next {
+ translate_pattern(c, d);
+ } else {
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, d);
+ d = d.next;
+ loop {
+ if !d {
+ break;
+ }
+
+ fputs(c.fout, " if !ok { choice(c);\n");
+ translate_pattern(c, d);
+ fputs(c.fout, " }\n");
+
+ d = d.next;
+ }
+ fputs(c.fout, " if ok { commit(c); } else { fail(c); }\n");
+ }
+ } else if n.tag == P_alternative {
+ d = n.child;
+ translate_pattern(c, d);
+ d = d.next;
+ loop {
+ if !d {
+ break;
+ }
+
+ fputs(c.fout, " if ok {\n");
+ translate_pattern(c, d);
+ fputs(c.fout, " }\n");
+
+ d = d.next;
+ }
+ } else if n.tag == P_lookahead {
+ look = decode_look(n);
+ d = n.child;
+ if d.tag == P_lookop {
+ d = d.next;
+ }
+
+ if look == LOOK_AND {
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, d);
+ fputs(c.fout, " fail(c);\n");
+ } else if look == LOOK_NOT {
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, d);
+ fputs(c.fout, " if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }\n");
+ } else if look == LOOK_NORMAL {
+ translate_pattern(c, d);
+ } else {
+ die("invalid lookop");
+ }
+ } else if n.tag == P_suffix {
+ count = decode_count(n);
+ if count == ZERO_OR_ONE {
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, n.child);
+ fputs(c.fout, " if ok { commit(c); } else { ok = 1; }\n");
+ } else if count == EXACTLY_ONE {
+ translate_pattern(c, n.child);
+ } else if count == ZERO_OR_MORE {
+ fputs(c.fout, " loop {\n");
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, n.child);
+ fputs(c.fout, " if !ok { break }\n");
+ fputs(c.fout, " commit(c);\n");
+ fputs(c.fout, " }\n");
+ } else if count == ONE_OR_MORE {
+ translate_pattern(c, n.child);
+ fputs(c.fout, " if ok {\n");
+ fputs(c.fout, " loop {\n");
+ fputs(c.fout, " choice(c);\n");
+ translate_pattern(c, n.child);
+ fputs(c.fout, " if !ok { break }\n");
+ fputs(c.fout, " commit(c);\n");
+ fputs(c.fout, " }\n");
+ fputs(c.fout, " }\n");
+ } else {
+ die("invalid countop");
+ }
+ } else if n.tag == P_primary {
+ translate_pattern(c, n.child);
+ } else if n.tag == P_any {
+ fputs(c.fout, " ok = any(c);\n");
+ } else if n.tag == P_literal {
+ fputs(c.fout, " ok = literal(c, \"");
+ translate_literal(c, n);
+ fputs(c.fout, "\");\n");
+ } else if n.tag == P_class {
+ fputs(c.fout, " ok = charset(c, \"");
+ translate_charset(c, n);
+ fputs(c.fout, "\");\n");
+ } else if n.tag == P_call {
+ fputs(c.fout, " ok = p_");
+ fputb(c.fout, n.child.str, n.child.len);
+ fputs(c.fout, "(c);\n");
+ } else if n.tag == P_sp {
+ n = n.next;
+ continue;
+ } else {
+ fdputs(2, tag_to_str(n.tag));
+ die("invalid tag");
+ }
+
+ break;
+ }
+}
+
+translate(c: *peg, n: *peg_node) {
+ var v: *peg_node;
+
+ // Generate tags for each rule
+ fputs(c.fout, "enum {\n");
+ v = n.child;
+ loop {
+ if !v {
+ break;
+ }
+
+ if v.tag == P_rule {
+ fputs(c.fout, " P_");
+ fputb(c.fout, v.child.str, v.child.len);
+ fputs(c.fout, ",\n");
+ }
+
+ v = v.next;
+ }
+ fputs(c.fout, "}\n");
+
+ // Generate parsing functions for each rule
+ v = n.child;
+ loop {
+ if !v {
+ break;
+ }
+
+ if v.tag == P_rule {
+ fputs(c.fout, "\np_");
+ fputb(c.fout, v.child.str, v.child.len);
+ fputs(c.fout, "(c: *peg): int {\n");
+ fputs(c.fout, " var ok: int;\n");
+ fputs(c.fout, " enter(c);\n");
+ translate_pattern(c, v.child.next);
+ fputs(c.fout, " if ok { leave(c, P_");
+ fputb(c.fout, v.child.str, v.child.len);
+ fputs(c.fout, "); } else { fail(c); }\n");
+ fputs(c.fout, " return ok;\n");
+ fputs(c.fout, "}\n");
+ }
+
+ v = v.next;
}
- fdputc(1, ')');
}
main(argc: int, argv: **byte, envp: **byte) {
- var fd: int;
+ var ifd: int;
+ var ofd: int;
var a: alloc;
var c: peg;
+ var i: int;
var node: *peg_node;
setup_alloc(&a);
+ ifd = 0;
+ ofd = 1;
+
+ i = 1;
+ loop {
+ if i >= argc {
+ break;
+ }
+
+ if strcmp(argv[i], "-o") == 0 {
+ i = i + 1;
+ if i >= argc {
+ die("expected output file name");
+ }
+
+ unlink(argv[i]);
+
+ ofd = open(argv[i], O_CREAT | O_WRONLY, (6 << 6) + (6 << 3) + 6);
+ if ofd < 0 {
+ die("failed to open output");
+ }
+
+ i = i + 1;
+ continue;
+ }
+
+ if argv[i][0] == '-':byte {
+ die("usage: ./peg [-o grammar.c] <grammar.peg>");
+ }
+
+ if ifd != 0 {
+ die("too many inputs");
+ }
+
+ ifd = open(argv[i], 0, 0);
+ if ifd < 0 {
+ die("failed to open input");
+ }
+
+ i = i + 1;
+ }
+
c.a = &a;
c.pos = 0;
c.limit = 1024;
@@ -733,19 +1152,13 @@ main(argc: int, argv: **byte, envp: **byte) {
c.np = 0;
c.nstack = alloc(c.a, c.ncap * sizeof(c.nstack[0])):**peg_node;
- if argc != 2 {
- die("usage: ./peg <grammar.peg>");
- }
-
- fd = open(argv[1], 0, 0);
- if fd < 0 {
- die("failed to open grammar");
- }
-
- c.f = fopen(fd, c.a);
+ c.scratch = alloc(c.a, 256);
+ c.f = fopen(ifd, c.a);
c.src = freadall(c.f, &c.size);
+ c.fout = fopen(ofd, c.a);
+
choice(&c);
if !p_grammar(&c) {
die("Syntax error");
@@ -753,5 +1166,9 @@ main(argc: int, argv: **byte, envp: **byte) {
commit(&c);
node = construct(&c);
- show(node);
+
+ translate(&c, node);
+
+ fflush(c.fout);
+ fclose(c.fout);
}
diff --git a/syscall.c b/syscall.c
@@ -69,6 +69,10 @@ mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int {
return syscall(9, addr, len, prot, flags, fd, off);
}
+munmap(addr: int, len: int): int {
+ return syscall(11, addr, len, 0, 0, 0, 0);
+}
+
struct sigaction {
handler: int;
flags: int;