os

An operating system
git clone https://erai.gay/code/os/
Log | Files | Refs | README | LICENSE

commit e98815b06bc941a0b653656b6176c614e78fca0d
parent ea6a9e8b6d4947c067ce6355ec2893b386380025
Author: erai <erai@omiltem.net>
Date:   Thu, 12 Sep 2024 12:47:50 -0400

Add peg parser recognizer

Diffstat:
Mbufio.c | 7+++++++
Acc3.peg | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apeg.c | 524+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apeg.peg | 15+++++++++++++++
Msyscall.c | 4++++
5 files changed, 670 insertions(+), 0 deletions(-)

diff --git a/bufio.c b/bufio.c @@ -153,3 +153,10 @@ fputs(f: *file, s: *byte) { i = i + 1; } } + +fseek(f: *file, off: int) { + f.r = 0; + f.w = 0; + f.eof = 0; + lseek(f.fd, off, 0); +} diff --git a/cc3.peg b/cc3.peg @@ -0,0 +1,120 @@ +grammar <- sp (enum_decl / struct_decl / func_decl)* !. + +enum_decl <- enum ident '{' sp (ident (',' ident)*)? (',' sp)? '}' sp + +struct_decl <- struct ident '{' sp (ident ':' sp type ';' sp)* '}' sp + +func_decl <- ident func_type (';' / '{' sp stmt* '}' ) sp + +type <- ident + / byte + / int + / void + / '*' sp type + / '(' sp type ')' sp + / func func_type + +func_type <- '(' sp + ( ident ':' sp type (',' sp ident ':' sp type)* )? + ( ',' sp )? + ')' sp (':' sp type)? + +stmt <- if_stmt + / loop_stmt + / break_stmt + / continue_stmt + / return_stmt + / var_stmt + / label_stmt + / goto_stmt + / assign_stmt + / expr_stmt + / empty_stmt + +if_stmt <- if expr '{' sp stmt* '}' sp + (else if expr '{' sp stmt* '}' sp)* + (else '{' sp stmt '}' sp)? + +loop_stmt <- 'loop' sp '{' sp stmt* '}' sp + +break_stmt <- 'break' sp ';' sp + +continue_stmt <- 'continue' sp ';' sp + +return_stmt <- return expr? sp ';' sp + +var_stmt <- var ident ':' sp type ';' sp + +label_stmt <- ':' sp ident ';' sp + +goto_stmt <- goto ident ';' sp + +assign_stmt <- unary_expr '=' sp expr ';' sp + +expr_stmt <- expr ';' sp + +empty_stmt <- ';' sp + +bool_expr <- comp_expr (('&&' / '||') sp comp_expr)* + +comp_expr <- add_expr (('<=' / '>=' / '<' / '>' / '==' / '!=') sp add_expr)? + +add_expr <- mul_expr (('+' / '-' / '|' / '^') sp add_expr)* + +mul_expr <- shift_expr (('*' / '/' / '%' / '&') sp mul_expr)* + +shift_expr <- unary_expr (('<<' / '>>') sp shift_expr)* + +unary_expr <- (('&' / '*' / '+' / '-' / '~' / '!') sp)* post_expr + +post_expr <- primary ( '[' sp expr ']' sp + / '(' sp ( expr (',' sp expr)* )? (',' sp)? ')' sp + / '.' sp ident + / ':' sp type )* + +primary <- ident + / literal + / '(' sp expr ')' sp + / sizeof '(' sp expr ')' sp + +literal <- '0x'[0-9a-fA-F]+ sp + / [0-9]+ sp + / ["] ([\\] . / .)* ["] sp + / ['] ([\\] . / .) ['] sp + +reserved <- return + / break + / sizeof + / if + / else + / loop + / continue + / goto + / var + / enum + / struct + / byte + / int + / void + +return <- 'return' tc +break <- 'break' tc +sizeof <- 'sizeof' tc +if <- 'if' tc +else <- 'else' tc +loop <- 'loop' tc +continue <- 'continue' tc +goto <- 'goto' tc +var <- 'var' tc +enum <- 'enum' tc +struct <- 'struct' tc +byte <- 'byte' tc +int <- 'int' tc +void <- 'void' tc + +ident <- !reserved [a-zA-Z_][a-zA-Z0-9_]* sp + +tc <- ![a-zA-Z0-9_] sp + +sp <- ( [ \r\n\t] + / '//' (![\r\n] .)* )* diff --git a/peg.c b/peg.c @@ -0,0 +1,524 @@ +struct compiler { + a: *alloc; + f: *file; + pos: int; + stack: *int; + sp: int; + limit: int; +} + +enum { + FAIL = 0, + OK = 1, +} + +choice(c: *compiler) { + if c.sp == c.limit { + die("backtrack overflow"); + } + c.stack[c.sp] = c.pos; + c.sp = c.sp + 1; +} + +commit(c: *compiler) { + if c.sp == 0 { + die("backtrack underflow"); + } + c.sp = c.sp - 1; +} + +fail(c: *compiler) { + if c.sp == 0 { + die("backtrack underflow"); + } + c.sp = c.sp - 1; + c.pos = c.stack[c.sp]; + fseek(c.f, c.pos); +} + +get(c: *compiler): int { + var ch: int; + + ch = fgetc(c.f); + if ch != -1 { + c.pos = c.pos + 1; + } + + return ch; +} + +literal(c: *compiler, s: *byte): int { + var i: int; + var ch: int; + + i = 0; + loop { + if !s[i] { + break; + } + + ch = get(c); + if ch != (s[i]:int) { + fail(c); + return FAIL; + } + + i = i + 1; + } + + return OK; +} + +charclass(c: *compiler, s: *byte): int { + var i: int; + var ch: int; + + ch = get(c); + + i = 0; + loop { + if !s[i] { + break; + } + + if ch == (s[i]:int) { + return OK; + } + + i = i + 1; + } + + fail(c); + return FAIL; +} + +any(c: *compiler): int { + var ch: int; + ch = get(c); + if ch == -1 { + fail(c); + return FAIL; + } + return OK; +} + +// grammar <- sp rule+ !. +p_grammar(c: *compiler): int { + if !p_sp(c) { + return FAIL; + } + + if !p_rule(c) { + return FAIL; + } + + loop { + choice(c); + if !p_rule(c) { + break; + } + commit(c); + } + + choice(c); + if !any(c) { + return OK; + } + fail(c); + fail(c); + + return FAIL; +} + +// rule <- ident '<-' sp pattern +p_rule(c: *compiler): int { + if !p_ident(c) { + return FAIL; + } + + if !literal(c, "<-") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + if !p_pattern(c) { + return FAIL; + } + + return OK; +} + +// pattern <- alt ( '/' sp alt )* +p_pattern(c: *compiler): int { + if !p_alt(c) { + return FAIL; + } + + loop { + choice(c); + + if !literal(c, "/") { + break; + } + + if !p_sp(c) { + break; + } + + if !p_alt(c) { + break; + } + + commit(c); + } + + return OK; +} + +// preop <- [!&] sp +p_preop(c: *compiler): int { + if !charclass(c, "!&") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// alt <- ( preop? suffix )+ +p_alt(c: *compiler): int { + choice(c); + if p_preop(c) { + commit(c); + } + + if !p_suffix(c) { + return FAIL; + } + + loop { + choice(c); + + choice(c); + if p_preop(c) { + commit(c); + } + + + if !p_suffix(c) { + break; + } + + commit(c); + } + + return OK; +} + +// postop <- [*+?] sp +p_postop(c: *compiler): int { + if !charclass(c, "*+?") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// suffix <- primary postop* +p_suffix(c: *compiler): int { + if !p_primary(c) { + return FAIL; + } + + loop { + choice(c); + + if !p_postop(c) { + break; + } + + commit(c); + } + + return OK; +} + +// primary <- group / any / literal / charclass / nonterminal +p_primary(c: *compiler): int { + choice(c); + if p_group(c) { + commit(c); + return OK; + } + + choice(c); + if p_any(c) { + commit(c); + return OK; + } + + choice(c); + if p_literal(c) { + commit(c); + return OK; + } + + choice(c); + if p_charclass(c) { + commit(c); + return OK; + } + + choice(c); + if p_nonterminal(c) { + commit(c); + return OK; + } + + fail(c); + return FAIL; +} + +// group <- '(' sp pattern ')' sp +p_group(c: *compiler): int { + if !literal(c, "(") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + if !p_pattern(c) { + return FAIL; + } + + if !literal(c, ")") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// any <- '.' sp +p_any(c: *compiler): int { + if !literal(c, ".") { + return FAIL; + } + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// literal <- ['] ( !['] . )* ['] sp +p_literal(c: *compiler): int { + choice(c); + if !literal(c, "'") { + return FAIL; + } + commit(c); + + loop { + choice(c); + + choice(c); + if literal(c, "'") { + fail(c); + fail(c); + break; + } + + if !any(c) { + break; + } + + commit(c); + } + + choice(c); + if !literal(c, "'") { + return FAIL; + } + commit(c); + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp +p_charclass(c: *compiler): int { + choice(c); + if !literal(c, "[") { + return FAIL; + } + commit(c); + + loop { + choice(c); + + choice(c); + if literal(c, "]") { + fail(c); + fail(c); + break; + } + + if !any(c) { + break; + } + + choice(c); + if literal(c, "-") { + if any(c) { + commit(c); + } + } + + commit(c); + } + + choice(c); + if !literal(c, "]") { + return FAIL; + } + commit(c); + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// nonterminal <- ident !'<-' +p_nonterminal(c: *compiler): int { + if !p_ident(c) { + return FAIL; + } + + choice(c); + if !literal(c, "<-") { + return OK; + } + fail(c); + fail(c); + return FAIL; +} + +// ident <- [a-zA-Z]+ sp +p_ident(c: *compiler): int { + var chars: *byte; + + chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + if !charclass(c, chars) { + return FAIL; + } + + loop { + choice(c); + + if !charclass(c, chars) { + break; + } + + commit(c); + } + + if !p_sp(c) { + return FAIL; + } + + return OK; +} + +// sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )* +p_sp(c: *compiler): int { + loop { + choice(c); + + choice(c); + if charclass(c, " \t\r\n") { + commit(c); + commit(c); + continue; + } + + choice(c); + if literal(c, "#") { + commit(c); + + loop { + choice(c); + + choice(c); + if charclass(c, "\r\n") { + fail(c); + fail(c); + break; + } + + if !any(c) { + break; + } + + commit(c); + } + + commit(c); + continue; + } + + break; + } + + return OK; +} + +main(argc: int, argv: **byte, envp: **byte) { + var fd: int; + var a: alloc; + var c: compiler; + setup_alloc(&a); + + c.a = &a; + c.pos = 0; + c.limit = 1024; + c.stack = alloc(c.a, c.limit * sizeof(c.stack[0])):*int; + + if argc != 2 { + die("usage: ./peg <grammar.peg>"); + } + + fd = open(argv[1], 0, 0); + if fd < 0 { + die("failed to open grammar"); + } + + c.f = fopen(fd, c.a); + + if !p_grammar(&c) { + die("FAIL"); + } + fdputs(1, "OK\n"); +} diff --git a/peg.peg b/peg.peg @@ -0,0 +1,15 @@ +grammar <- sp rule+ !. +rule <- ident '<-' sp pattern +pattern <- alt ( '/' sp alt )* +preop <- [!&] sp +alt <- ( preop? suffix )+ +postop <- [*+?] sp +suffix <- primary postop* +primary <- group / any / literal / charclass / nonterminal +group <- '(' sp pattern ')' sp +any <- '.' sp +literal <- ['] ( !['] . )* ['] sp +charclass <- '[' ( !']' ( . '-' . / . ) )* ']' sp +nonterminal <- ident !'<-' +ident <- [a-zA-Z]+ sp +sp <- ( [ \t\r\n] / '#' ( ![\r\n] . )* )* diff --git a/syscall.c b/syscall.c @@ -61,6 +61,10 @@ poll(pfd: *int, nfd: int, timeout: int): int { return syscall(7, pfd:int, nfd, timeout, 0, 0, 0); } +lseek(fd: int, off: int, whence: int): int { + return syscall(8, fd, off, whence, 0, 0, 0); +} + mmap(addr: int, len: int, prot: int, flags: int, fd: int, off: int): int { return syscall(9, addr, len, prot, flags, fd, off); }