os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit 926ecdedf8184453554bc0a2ae8a4588d25c0f00
parent 36d43fdc15a7021e3d8dfd67f098cffdc92c357f
Author: erai <erai@omiltem.net>
Date:   Sat, 14 Sep 2024 12:10:36 -0400

Translate peg to code

Diffstat:
Mbufio.c | 12++++++++++++
Mpeg.c | 473++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Msyscall.c | 4++++
3 files changed, 461 insertions(+), 28 deletions(-)

diff --git a/bufio.c b/bufio.c @@ -154,6 +154,18 @@ fputs(f: *file, s: *byte) { } } +fputb(f: *file, s: *byte, n: int) { + var i: int; + i = 0; + loop { + if i >= n { + break; + } + fputc(f, s[i]:int); + i = i + 1; + } +} + fseek(f: *file, off: int) { f.r = 0; f.w = 0; diff --git a/peg.c b/peg.c @@ -14,6 +14,7 @@ struct peg_op { struct peg { a: *alloc; f: *file; + fout: *file; src: *byte; size: int; pos: int; @@ -27,6 +28,7 @@ struct peg { nstack: **peg_node; np: int; ncap: int; + scratch: *byte; } enum { @@ -693,37 +695,454 @@ construct(c: *peg): *peg_node { } } -show(n: *peg_node) { - fdputc(1, '('); - fdputs(1, tag_to_str(n.tag)); - if n.child { - n = n.child; - loop { - if !n { - break; +enum { + LOOK_NORMAL, + LOOK_NOT, + LOOK_AND, +} + +decode_look(n: *peg_node): int { + var ret: int; + + ret = LOOK_NORMAL; + if n.child.tag == P_lookop { + if n.child.str[0] == '!':byte { + ret = LOOK_NOT; + } else if n.child.str[0] == '&':byte { + ret = LOOK_AND; + } + } + + return ret; +} + +enum { + ZERO_OR_ONE, + EXACTLY_ONE, + ZERO_OR_MORE, + ONE_OR_MORE, +} + +decode_count(n: *peg_node): int { + var ret: int; + + ret = EXACTLY_ONE; + n = n.child; + loop { + if !n { + return ret; + } + + if n.tag == P_countop { + if n.str[0] == '?':byte { + if ret == EXACTLY_ONE { + ret = ZERO_OR_ONE; + } else if ret == ONE_OR_MORE { + ret = ZERO_OR_MORE; + } + } else if n.str[0] == '*':byte { + ret = ZERO_OR_MORE; + } else if n.str[0] == '+':byte { + if ret == ZERO_OR_ONE { + ret = ZERO_OR_MORE; + } else if ret == EXACTLY_ONE { + ret = ONE_OR_MORE; + } else if ret == ZERO_OR_MORE { + ret = ZERO_OR_MORE; + } + } else { + die("invalid countop"); } + } - fdputc(1, ' '); - show(n); + n = n.next; + } +} - n = n.next; +translate_literal(c: *peg, n: *peg_node) { + var i: int; + var len: int; + var ch: int; + var hex: *byte; + + hex = "0123456789abcdef"; + + i = 1; + len = n.len - 1; + loop { + if i == len { + break; + } + + ch = n.str[i]:int; + + if ch < 32 || ch > 127 { + fputc(c.fout, '\\'); + fputc(c.fout, 'x'); + fputc(c.fout, hex[ch >> 4]:int); + fputc(c.fout, hex[ch & 15]:int); + } else { + fputc(c.fout, ch); + } + + i = i + 1; + } +} + +hexdig(c: byte): int { + var ch: int; + + ch = c:int; + + if ch >= '0' && ch <= '9' { + return ch - '0'; + } + + if ch >= 'a' && ch <= 'f' { + return ch - 'a' + 10; + } + + if ch >= 'A' && ch <= 'F' { + return ch - 'A' + 10; + } + + die("invalid hex digit"); +} + +parse_escape(s: *byte, i: *int, n: int): int { + var nc: int; + + if *i == n { + die("invalid escape"); + } + + nc = s[*i]:int; + *i = *i + 1; + + if nc == 't' { + return '\t'; + } else if nc == 'r' { + return '\r'; + } else if nc == 'n' { + return '\n'; + } else if nc == '\\' { + return '\\'; + } else if nc == '\'' { + return '\''; + } else if nc == '"' { + return '"'; + } else if nc == '-' { + return '-'; + } else if nc == '[' { + return '['; + } else if nc == ']' { + return ']'; + } else if nc == 'x' { + if n - *i < 2 { + die("invalid escape"); } + nc = hexdig(s[*i]) * 16 + hexdig(s[*i + 1]); + *i = *i + 2; + return nc; } else { - fdputc(1, ' '); - fdputc(1, '"'); - write(1, n.str, n.len); - fdputc(1, '"'); + die("invalid escape"); + } +} + +translate_charset(c: *peg, n: *peg_node) { + var i: int; + var len: int; + var ch: int; + var a: int; + var b: int; + var hex: *byte; + + hex = "0123456789abcdef"; + + memset(c.scratch, 0, 256); + + i = 1; + len = n.len - 1; + loop { + if i == len { + break; + } + + ch = n.str[i]:int; + i = i + 1; + + if ch == '\\' { + ch = parse_escape(n.str, &i, len); + } + + if i < len && n.str[i] == '-':byte { + i = i + 1; + + if i == len { + die("invalid range"); + } + + a = ch; + + ch = n.str[i]:int; + i = i + 1; + + if ch == '\\' { + ch = parse_escape(n.str, &i, len); + } + + b = ch; + + loop { + if a > b { + break; + } + + c.scratch[a] = 1:byte; + + a = a + 1; + } + } else { + c.scratch[ch] = 1:byte; + } + } + + i = 1; + loop { + if i == 256 { + break; + } + + if c.scratch[i] { + if i < 32 || i > 127 { + fputc(c.fout, '\\'); + fputc(c.fout, 'x'); + fputc(c.fout, hex[i >> 4]:int); + fputc(c.fout, hex[i & 15]:int); + } else { + fputc(c.fout, i); + } + } + + i = i + 1; + } +} + +translate_pattern(c: *peg, n: *peg_node) { + var count: int; + var look: int; + var d: *peg_node; + + loop { + if n.tag == P_pattern { + d = n.child; + if !d.next { + translate_pattern(c, d); + } else { + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, d); + d = d.next; + loop { + if !d { + break; + } + + fputs(c.fout, " if !ok { choice(c);\n"); + translate_pattern(c, d); + fputs(c.fout, " }\n"); + + d = d.next; + } + fputs(c.fout, " if ok { commit(c); } else { fail(c); }\n"); + } + } else if n.tag == P_alternative { + d = n.child; + translate_pattern(c, d); + d = d.next; + loop { + if !d { + break; + } + + fputs(c.fout, " if ok {\n"); + translate_pattern(c, d); + fputs(c.fout, " }\n"); + + d = d.next; + } + } else if n.tag == P_lookahead { + look = decode_look(n); + d = n.child; + if d.tag == P_lookop { + d = d.next; + } + + if look == LOOK_AND { + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, d); + fputs(c.fout, " fail(c);\n"); + } else if look == LOOK_NOT { + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, d); + fputs(c.fout, " if ok { fail(c); fail(c); ok = 0; } else { ok = 1; }\n"); + } else if look == LOOK_NORMAL { + translate_pattern(c, d); + } else { + die("invalid lookop"); + } + } else if n.tag == P_suffix { + count = decode_count(n); + if count == ZERO_OR_ONE { + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, n.child); + fputs(c.fout, " if ok { commit(c); } else { ok = 1; }\n"); + } else if count == EXACTLY_ONE { + translate_pattern(c, n.child); + } else if count == ZERO_OR_MORE { + fputs(c.fout, " loop {\n"); + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, n.child); + fputs(c.fout, " if !ok { break }\n"); + fputs(c.fout, " commit(c);\n"); + fputs(c.fout, " }\n"); + } else if count == ONE_OR_MORE { + translate_pattern(c, n.child); + fputs(c.fout, " if ok {\n"); + fputs(c.fout, " loop {\n"); + fputs(c.fout, " choice(c);\n"); + translate_pattern(c, n.child); + fputs(c.fout, " if !ok { break }\n"); + fputs(c.fout, " commit(c);\n"); + fputs(c.fout, " }\n"); + fputs(c.fout, " }\n"); + } else { + die("invalid countop"); + } + } else if n.tag == P_primary { + translate_pattern(c, n.child); + } else if n.tag == P_any { + fputs(c.fout, " ok = any(c);\n"); + } else if n.tag == P_literal { + fputs(c.fout, " ok = literal(c, \""); + translate_literal(c, n); + fputs(c.fout, "\");\n"); + } else if n.tag == P_class { + fputs(c.fout, " ok = charset(c, \""); + translate_charset(c, n); + fputs(c.fout, "\");\n"); + } else if n.tag == P_call { + fputs(c.fout, " ok = p_"); + fputb(c.fout, n.child.str, n.child.len); + fputs(c.fout, "(c);\n"); + } else if n.tag == P_sp { + n = n.next; + continue; + } else { + fdputs(2, tag_to_str(n.tag)); + die("invalid tag"); + } + + break; + } +} + +translate(c: *peg, n: *peg_node) { + var v: *peg_node; + + // Generate tags for each rule + fputs(c.fout, "enum {\n"); + v = n.child; + loop { + if !v { + break; + } + + if v.tag == P_rule { + fputs(c.fout, " P_"); + fputb(c.fout, v.child.str, v.child.len); + fputs(c.fout, ",\n"); + } + + v = v.next; + } + fputs(c.fout, "}\n"); + + // Generate parsing functions for each rule + v = n.child; + loop { + if !v { + break; + } + + if v.tag == P_rule { + fputs(c.fout, "\np_"); + fputb(c.fout, v.child.str, v.child.len); + fputs(c.fout, "(c: *peg): int {\n"); + fputs(c.fout, " var ok: int;\n"); + fputs(c.fout, " enter(c);\n"); + translate_pattern(c, v.child.next); + fputs(c.fout, " if ok { leave(c, P_"); + fputb(c.fout, v.child.str, v.child.len); + fputs(c.fout, "); } else { fail(c); }\n"); + fputs(c.fout, " return ok;\n"); + fputs(c.fout, "}\n"); + } + + v = v.next; } - fdputc(1, ')'); } main(argc: int, argv: **byte, envp: **byte) { - var fd: int; + var ifd: int; + var ofd: int; var a: alloc; var c: peg; + var i: int; var node: *peg_node; setup_alloc(&a); + ifd = 0; + ofd = 1; + + i = 1; + loop { + if i >= argc { + break; + } + + if strcmp(argv[i], "-o") == 0 { + i = i + 1; + if i >= argc { + die("expected output file name"); + } + + unlink(argv[i]); + + ofd = open(argv[i], O_CREAT | O_WRONLY, (6 << 6) + (6 << 3) + 6); + if ofd < 0 { + die("failed to open output"); + } + + i = i + 1; + continue; + } + + if argv[i][0] == '-':byte { + die("usage: ./peg [-o grammar.c] <grammar.peg>"); + } + + if ifd != 0 { + die("too many inputs"); + } + + ifd = open(argv[i], 0, 0); + if ifd < 0 { + die("failed to open input"); + } + + i = i + 1; + } + c.a = &a; c.pos = 0; c.limit = 1024; @@ -733,19 +1152,13 @@ main(argc: int, argv: **byte, envp: **byte) { c.np = 0; c.nstack = alloc(c.a, c.ncap * sizeof(c.nstack[0])):**peg_node; - if argc != 2 { - die("usage: ./peg <grammar.peg>"); - } - - fd = open(argv[1], 0, 0); - if fd < 0 { - die("failed to open grammar"); - } - - c.f = fopen(fd, c.a); + c.scratch = alloc(c.a, 256); + c.f = fopen(ifd, c.a); c.src = freadall(c.f, &c.size); + c.fout = fopen(ofd, c.a); + choice(&c); if !p_grammar(&c) { die("Syntax error"); @@ -753,5 +1166,9 @@ main(argc: int, argv: **byte, envp: **byte) { commit(&c); node = construct(&c); - show(node); + + translate(&c, node); + + fflush(c.fout); + fclose(c.fout); } diff --git a/syscall.c b/syscall.c @@ -69,6 +69,10 @@ mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int { return syscall(9, addr, len, prot, flags, fd, off); } +munmap(addr: int, len: int): int { + return syscall(11, addr, len, 0, 0, 0, 0); +} + struct sigaction { handler: int; flags: int;