commit deeffcab331bb7e1e2fd0d18c92fe3662fc39e69
parent 7dbbe8ac0fb1c48750e5766fe27de673613347c8
Author: erai <erai@omiltem.net>
Date: Wed, 9 Apr 2025 20:49:39 -0400
move lexlib into lexer
Diffstat:
M | bootstrap.sh | | | 2 | +- |
M | cc0.c | | | 181 | +++++++++++++++++++++++++++++++++++++++++-------------------------------------- |
M | cc4.om | | | 180 | ++++++++++++++++++++++++++++++++++++++++---------------------------------------- |
D | dump.om | | | 31 | ------------------------------- |
M | lexer.om | | | 200 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | lexlib.om | | | 197 | ------------------------------------------------------------------------------- |
6 files changed, 385 insertions(+), 406 deletions(-)
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -2,7 +2,7 @@
BOOTSTRAP="cc0.c"
LIBS="bufio.om lib.om alloc.om syscall.om"
-SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om lexlib.om"
+SOURCES="cc1.om type.om parse2.om peglib.om as.om decl.om node.om peg.om ir.om ircout.om rb.om table.om lexer.om lalr.om cc4.om"
# Build the bootstrap compiler from c
[ cc0 -nt cc0.c ] || ${CC:-gcc} -O1 -g ${BOOTSTRAP} -o cc0
diff --git a/cc0.c b/cc0.c
@@ -4659,24 +4659,20 @@ b8: v20 = (u)zgettok;
v22 = ((u(*)())v20)(v21);
vtok = v22;
if (vtok != -1UL) goto b13;
- if (!*(u*)(vl + 40UL)) goto b29;
+ if (!*(u*)(vl + 48UL)) goto b23;
v43 = 0UL;
-b30: if (!v43) goto b27;
+b24: if (!v43) goto b21;
v44 = (u)zdie;
v45 = (u)"problem";
v46 = ((u(*)())v44)(v45);
v46;
-b25: return 0UL;
-b27: goto b25;
-b29: v43 = 1UL;
- goto b30;
-b13: if (vtok != 59UL) goto b16;
- goto b8;
-b16: if (vtok != 60UL) goto b19;
- goto b8;
-b19: v23 = (u)zfputs;
+b19: return 0UL;
+b21: goto b19;
+b23: v43 = 1UL;
+ goto b24;
+b13: v23 = (u)zfputs;
v24 = 0UL;
- v26 = (u)zget_tag_name;
+ v26 = *(u*)(vl + 32UL);
v27 = vtok;
v28 = ((u(*)())v26)(v27);
v25 = v28;
@@ -4689,8 +4685,8 @@ b19: v23 = (u)zfputs;
v33;
v34 = (u)zfputb;
v35 = 0UL;
- v36 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL;
- v37 = *(u*)(vl + 72UL) - *(u*)(vl + 64UL);
+ v36 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL;
+ v37 = *(u*)(vl + 80UL) - *(u*)(vl + 72UL);
v38 = ((u(*)())v34)(v35, v36, v37);
v38;
v39 = (u)zfputs;
@@ -19990,93 +19986,103 @@ u zgettok(u vl) {
u v32 = 0;
u v33 = 0;
u v34 = 0;
- vptr = *(u*)(vl + 72UL);
- vlineno = *(u*)(vl + 120UL);
- vcolno = *(u*)(vl + 128UL);
- *(u*)(vl + 64UL) = vptr;
- *(u*)(vl + 104UL) = vlineno;
- *(u*)(vl + 112UL) = vcolno;
- *(u*)(vl + 96UL) = -1UL;
- *(u*)(vl + 120UL) = vlineno;
- *(u*)(vl + 128UL) = vcolno;
+ u v35 = 0;
+b1: vptr = *(u*)(vl + 80UL);
+ vlineno = *(u*)(vl + 128UL);
+ vcolno = *(u*)(vl + 136UL);
+ *(u*)(vl + 72UL) = vptr;
+ *(u*)(vl + 112UL) = vlineno;
+ *(u*)(vl + 120UL) = vcolno;
+ *(u*)(vl + 104UL) = -1UL;
+ *(u*)(vl + 128UL) = vlineno;
+ *(u*)(vl + 136UL) = vcolno;
vstate = 0UL;
-b1: if (vptr != *(u*)(vl + 80UL)) goto b5;
- if (!*(u*)(vl + 48UL)) goto b8;
- if (*(u*)(vl + 64UL) != *(u*)(vl + 80UL)) goto b11;
- *(u*)(vl + 40UL) = 1UL;
-b9:b2: return *(u*)(vl + 96UL);
-b11: goto b9;
-b8: if ((s)(*(u*)(vl + 80UL) - *(u*)(vl + 64UL)) < (s)(*(u*)(vl + 88UL) >> 1UL)) goto b14;
- *(u*)(vl + 88UL) = *(u*)(vl + 88UL) * 2UL + 256UL;
+b3: if (vptr != *(u*)(vl + 88UL)) goto b7;
+ if (!*(u*)(vl + 56UL)) goto b10;
+ if (*(u*)(vl + 72UL) != *(u*)(vl + 88UL)) goto b13;
+ *(u*)(vl + 48UL) = 1UL;
+b11:b4: if (*(u*)(vl + 104UL) != 59UL) goto b44;
+ v35 = 1UL;
+b46: if (!v35) goto b42;
+ goto b1;
+b42: return *(u*)(vl + 104UL);
+b44: if (*(u*)(vl + 104UL) != 60UL) goto b47;
+ v35 = 1UL;
+ goto b46;
+b47: v35 = 0UL;
+ goto b46;
+b13: goto b11;
+b10: if ((s)(*(u*)(vl + 88UL) - *(u*)(vl + 72UL)) < (s)(*(u*)(vl + 96UL) >> 1UL)) goto b16;
+ *(u*)(vl + 96UL) = *(u*)(vl + 96UL) * 2UL + 256UL;
v9 = (u)zalloc;
v10 = *(u*)(vl + 0UL);
- v11 = *(u*)(vl + 88UL);
+ v11 = *(u*)(vl + 96UL);
v12 = ((u(*)())v9)(v10, v11);
vtmp = v12;
v13 = (u)zmemcpy;
v14 = vtmp;
- v15 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL;
- v16 = *(u*)(vl + 80UL) - *(u*)(vl + 64UL);
+ v15 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL;
+ v16 = *(u*)(vl + 88UL) - *(u*)(vl + 72UL);
v17 = ((u(*)())v13)(v14, v15, v16);
v17;
v18 = (u)zfree;
v19 = *(u*)(vl + 0UL);
- v20 = *(u*)(vl + 56UL);
+ v20 = *(u*)(vl + 64UL);
v21 = ((u(*)())v18)(v19, v20);
v21;
- *(u*)(vl + 56UL) = vtmp;
- vptr = vptr - *(u*)(vl + 64UL);
- *(u*)(vl + 72UL) = *(u*)(vl + 72UL) - *(u*)(vl + 64UL);
- *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 64UL);
- *(u*)(vl + 64UL) = 0UL;
-b12: v27 = (u)zread;
- v28 = *(u*)(vl + 32UL);
- v29 = *(u*)(vl + 56UL) + *(u*)(vl + 80UL) * 1UL;
- v30 = *(u*)(vl + 88UL) - *(u*)(vl + 80UL);
+ *(u*)(vl + 64UL) = vtmp;
+ vptr = vptr - *(u*)(vl + 72UL);
+ *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 72UL);
+ *(u*)(vl + 88UL) = *(u*)(vl + 88UL) - *(u*)(vl + 72UL);
+ *(u*)(vl + 72UL) = 0UL;
+b14: v27 = (u)zread;
+ v28 = *(u*)(vl + 40UL);
+ v29 = *(u*)(vl + 64UL) + *(u*)(vl + 88UL) * 1UL;
+ v30 = *(u*)(vl + 96UL) - *(u*)(vl + 88UL);
v31 = ((u(*)())v27)(v28, v29, v30);
vret = v31;
- if ((s)vret >= (s)0UL) goto b24;
+ if ((s)vret >= (s)0UL) goto b26;
v32 = (u)zdie;
v33 = (u)"read failed";
v34 = ((u(*)())v32)(v33);
v34;
-b22: if (vret != 0UL) goto b28;
- *(u*)(vl + 48UL) = 1UL;
- goto b2;
-b28: *(u*)(vl + 80UL) = *(u*)(vl + 80UL) + vret;
-b3: vch = (u)*(b*)(*(u*)(vl + 56UL) + vptr * 1UL);
+b24: if (vret != 0UL) goto b30;
+ *(u*)(vl + 56UL) = 1UL;
+ goto b4;
+b30: *(u*)(vl + 88UL) = *(u*)(vl + 88UL) + vret;
+b5: vch = (u)*(b*)(*(u*)(vl + 64UL) + vptr * 1UL);
vptr = vptr + 1UL;
- if (vch != 10UL) goto b31;
+ if (vch != 10UL) goto b33;
vlineno = vlineno + 1UL;
vcolno = 1UL;
-b29: vstate = *(u*)(*(u*)(vl + 24UL) + (vstate * 256UL + vch) * 8UL);
- if (vstate != -1UL) goto b34;
- goto b2;
-b34: vtag = *(u*)(*(u*)(vl + 16UL) + vstate * 8UL);
- if (!vtag) goto b37;
- *(u*)(vl + 96UL) = vtag;
- *(u*)(vl + 72UL) = vptr;
- *(u*)(vl + 120UL) = vlineno;
- *(u*)(vl + 128UL) = vcolno;
-b35: goto b1;
-b37: goto b35;
-b31: vcolno = vcolno + 1UL;
- goto b29;
-b24: goto b22;
-b14: if ((s)*(u*)(vl + 80UL) < (s)(*(u*)(vl + 88UL) >> 1UL)) goto b19;
+b31: vstate = *(u*)(*(u*)(vl + 24UL) + (vstate * 256UL + vch) * 8UL);
+ if (vstate != -1UL) goto b36;
+ goto b4;
+b36: vtag = *(u*)(*(u*)(vl + 16UL) + vstate * 8UL);
+ if (!vtag) goto b39;
+ *(u*)(vl + 104UL) = vtag;
+ *(u*)(vl + 80UL) = vptr;
+ *(u*)(vl + 128UL) = vlineno;
+ *(u*)(vl + 136UL) = vcolno;
+b37: goto b3;
+b39: goto b37;
+b33: vcolno = vcolno + 1UL;
+ goto b31;
+b26: goto b24;
+b16: if ((s)*(u*)(vl + 88UL) < (s)(*(u*)(vl + 96UL) >> 1UL)) goto b21;
v22 = (u)zmemcpy;
- v23 = *(u*)(vl + 56UL);
- v24 = *(u*)(vl + 56UL) + *(u*)(vl + 64UL) * 1UL;
- v25 = *(u*)(vl + 80UL) - *(u*)(vl + 64UL);
+ v23 = *(u*)(vl + 64UL);
+ v24 = *(u*)(vl + 64UL) + *(u*)(vl + 72UL) * 1UL;
+ v25 = *(u*)(vl + 88UL) - *(u*)(vl + 72UL);
v26 = ((u(*)())v22)(v23, v24, v25);
v26;
- vptr = vptr - *(u*)(vl + 64UL);
- *(u*)(vl + 72UL) = *(u*)(vl + 72UL) - *(u*)(vl + 64UL);
- *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 64UL);
- *(u*)(vl + 64UL) = 0UL;
- goto b12;
-b19: goto b12;
-b5: goto b3;
+ vptr = vptr - *(u*)(vl + 72UL);
+ *(u*)(vl + 80UL) = *(u*)(vl + 80UL) - *(u*)(vl + 72UL);
+ *(u*)(vl + 88UL) = *(u*)(vl + 88UL) - *(u*)(vl + 72UL);
+ *(u*)(vl + 72UL) = 0UL;
+ goto b14;
+b21: goto b14;
+b7: goto b5;
}
u zhex2int(u vs, u vlen, u vok) {
u vi = 0;
@@ -29548,17 +29554,17 @@ b8: goto b6;
b3: goto b1;
}
u zopen_lex(u vl, u vfd) {
- *(u*)(vl + 32UL) = vfd;
- *(u*)(vl + 40UL) = 0UL;
+ *(u*)(vl + 40UL) = vfd;
*(u*)(vl + 48UL) = 0UL;
- *(u*)(vl + 64UL) = 0UL;
+ *(u*)(vl + 56UL) = 0UL;
*(u*)(vl + 72UL) = 0UL;
*(u*)(vl + 80UL) = 0UL;
- *(u*)(vl + 96UL) = -1UL;
- *(u*)(vl + 104UL) = 1UL;
+ *(u*)(vl + 88UL) = 0UL;
+ *(u*)(vl + 104UL) = -1UL;
*(u*)(vl + 112UL) = 1UL;
*(u*)(vl + 120UL) = 1UL;
*(u*)(vl + 128UL) = 1UL;
+ *(u*)(vl + 136UL) = 1UL;
return 0UL;
}
u zopen_lines_out(u vc, u vfilename) {
@@ -39695,7 +39701,7 @@ u zsetup_lex(u va) {
u v10 = 0;
v3 = (u)zalloc;
v4 = va;
- v5 = 136UL;
+ v5 = 144UL;
v6 = ((u(*)())v3)(v4, v5);
vl = v6;
*(u*)(vl + 0UL) = va;
@@ -39705,22 +39711,23 @@ u zsetup_lex(u va) {
v9 = (u)zget_link_table;
v10 = ((u(*)())v9)();
*(u*)(vl + 24UL) = v10;
+ *(u*)(vl + 32UL) = (u)zget_tag_name;
vi = 0UL;
b4: if (*(u*)(*(u*)(vl + 16UL) + vi * 8UL) != -1UL) goto b8;
*(u*)(vl + 8UL) = vi;
- *(u*)(vl + 32UL) = 0UL;
- *(u*)(vl + 40UL) = 1UL;
+ *(u*)(vl + 40UL) = 0UL;
*(u*)(vl + 48UL) = 1UL;
- *(u*)(vl + 56UL) = 0UL;
+ *(u*)(vl + 56UL) = 1UL;
*(u*)(vl + 64UL) = 0UL;
*(u*)(vl + 72UL) = 0UL;
*(u*)(vl + 80UL) = 0UL;
*(u*)(vl + 88UL) = 0UL;
- *(u*)(vl + 96UL) = -1UL;
- *(u*)(vl + 104UL) = 1UL;
+ *(u*)(vl + 96UL) = 0UL;
+ *(u*)(vl + 104UL) = -1UL;
*(u*)(vl + 112UL) = 1UL;
*(u*)(vl + 120UL) = 1UL;
*(u*)(vl + 128UL) = 1UL;
+ *(u*)(vl + 136UL) = 1UL;
return vl;
b8: vi = vi + 1UL;
goto b4;
diff --git a/cc4.om b/cc4.om
@@ -70,93 +70,93 @@ lexer {
COMMENT = "//" [[^\n]]* "\n";
}
-//lalr {
-// grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*;
-//
-// enum_item = IDENT (ASSIGN expr)?;
-// enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE;
-//
-// member_decl = IDENT COLON type_decl SEMI;
-// struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
-// union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
-//
-// func_decl = FUNC IDENT func_type (SEMI | compound_stmt);
-//
-// type_decl = IDENT | BYTE | INT | VOID
-// | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN;
-//
-// ptr_type = STAR type_decl;
-//
-// arg_decl = IDENT COLON type_decl;
-// func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN
-// (COLON type_decl)?;
-//
-// stmt = if_stmt | loop_stmt | break_stmt | continue_stmt
-// | return_stmt | var_stmt | label_stmt | goto_stmt
-// | assign_stmt | expr_stmt | empty_stmt | compound_stmt;
-//
-// elif_stmt = ELSE IF expr compound_stmt;
-// else_stmt = ELSE compound_stmt;
-// if_stmt = IF expr compound_stmt elif_stmt* else_stmt?;
-//
-// loop_stmt = LOOP compound_stmt;
-//
-// break_stmt = BREAK SEMI;
-//
-// continue_stmt = CONTINUE SEMI;
-//
-// return_stmt = RETURN expr? SEMI;
-//
-// var_stmt = VAR IDENT COLON type_decl SEMI;
-//
-// label_stmt = IDENT COLON;
-//
-// goto_stmt = GOTO IDENT SEMI;
-//
-// assign_stmt = unary_expr ASSIGN expr SEMI;
-//
-// expr_stmt = expr SEMI;
-//
-// empty_stmt = SEMI;
-//
-// compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE;
-//
-// expr = bool_expr;
-//
-// bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*;
-//
-// comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?;
-//
-// add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*;
-//
-// mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*;
-//
-// shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*;
-//
-// unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr;
-//
-// index_expr = LEFT_SQUARE expr RIGHT_SQUARE;
-// call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN;
-// member_expr = DOT IDENT;
-// cast_expr = AS type_decl;
-// post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*;
-//
-// primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR
-// | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN;
-//
-// sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN;
-//
-// lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET;
-// lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*;
-// lexer_alternative = lexer_suffix*;
-// lexer_pattern = lexer_alternative (PIPE lexer_alternative)*;
-// lexer_rule = IDENT ASSIGN lexer_pattern SEMI;
-// lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE;
-//
-// lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT;
-// lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*;
-// lalr_alternative = lalr_suffix*;
-// lalr_pattern = lalr_alternative (PIPE lalr_alternative)*;
-// lalr_rule = IDENT ASSIGN lalr_pattern SEMI;
-// lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE;
-//}
+lalr {
+ grammar = (enum_decl | struct_decl | union_decl | func_decl | lexer_grammar | lalr_grammar)*;
+
+ enum_item = IDENT (ASSIGN expr)?;
+ enum_decl = ENUM LEFT_BRACE (enum_item COMMA)* enum_item? RIGHT_BRACE;
+
+ member_decl = IDENT COLON type_decl SEMI;
+ struct_decl = STRUCT IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
+ union_decl = UNION IDENT LEFT_BRACE member_decl* RIGHT_BRACE;
+
+ func_decl = FUNC IDENT func_type (SEMI | compound_stmt);
+
+ type_decl = IDENT | BYTE | INT | VOID
+ | FUNC func_type | ptr_type | LEFT_PAREN type_decl RIGHT_PAREN;
+
+ ptr_type = STAR type_decl;
+
+ arg_decl = IDENT COLON type_decl;
+ func_type = LEFT_PAREN (arg_decl COMMA)* arg_decl? RIGHT_PAREN
+ (COLON type_decl)?;
+
+ stmt = if_stmt | loop_stmt | break_stmt | continue_stmt
+ | return_stmt | var_stmt | label_stmt | goto_stmt
+ | assign_stmt | expr_stmt | empty_stmt | compound_stmt;
+
+ elif_stmt = ELSE IF expr compound_stmt;
+ else_stmt = ELSE compound_stmt;
+ if_stmt = IF expr compound_stmt elif_stmt* else_stmt?;
+
+ loop_stmt = LOOP compound_stmt;
+
+ break_stmt = BREAK SEMI;
+
+ continue_stmt = CONTINUE SEMI;
+
+ return_stmt = RETURN expr? SEMI;
+
+ var_stmt = VAR IDENT COLON type_decl SEMI;
+
+ label_stmt = IDENT COLON;
+
+ goto_stmt = GOTO IDENT SEMI;
+
+ assign_stmt = unary_expr ASSIGN expr SEMI;
+
+ expr_stmt = expr SEMI;
+
+ empty_stmt = SEMI;
+
+ compound_stmt = LEFT_BRACE stmt* RIGHT_BRACE;
+
+ expr = bool_expr;
+
+ bool_expr = comp_expr ((AND_THEN|OR_ELSE) comp_expr)*;
+
+ comp_expr = add_expr ((LE|GE|LT|GT|EQ|NE) add_expr)?;
+
+ add_expr = mul_expr ((PLUS|MINUS|PIPE|XOR) mul_expr)*;
+
+ mul_expr = shift_expr ((STAR|SLASH|MOD|AMP) shift_expr)*;
+
+ shift_expr = unary_expr ((LEFT_SHIFT|RIGHT_SHIFT) unary_expr)*;
+
+ unary_expr = (AMP|STAR|PLUS|MINUS|NOT|BANG)* post_expr;
+
+ index_expr = LEFT_SQUARE expr RIGHT_SQUARE;
+ call_expr = LEFT_PAREN (expr COMMA)* expr? RIGHT_PAREN;
+ member_expr = DOT IDENT;
+ cast_expr = AS type_decl;
+ post_expr = primary (index_expr | call_expr | member_expr | cast_expr)*;
+
+ primary = IDENT | HEXIDECIMAL | DECIMAL | STRING | CHAR
+ | sizeof_expr | NIL | LEFT_PAREN expr RIGHT_PAREN;
+
+ sizeof_expr = SIZEOF LEFT_PAREN expr RIGHT_PAREN;
+
+ lexer_primary = LEFT_PAREN lexer_pattern RIGHT_PAREN | DOT | STRING | CHARSET;
+ lexer_suffix = lexer_primary (STAR|PLUS|QMARK)*;
+ lexer_alternative = lexer_suffix*;
+ lexer_pattern = lexer_alternative (PIPE lexer_alternative)*;
+ lexer_rule = IDENT ASSIGN lexer_pattern SEMI;
+ lexer_grammar = LEX LEFT_BRACE lexer_rule+ RIGHT_BRACE;
+
+ lalr_primary = LEFT_PAREN lalr_pattern RIGHT_PAREN | IDENT;
+ lalr_suffix = lalr_primary (STAR|PLUS|QMARK)*;
+ lalr_alternative = lalr_suffix*;
+ lalr_pattern = lalr_alternative (PIPE lalr_alternative)*;
+ lalr_rule = IDENT ASSIGN lalr_pattern SEMI;
+ lalr_grammar = LALR LEFT_BRACE lalr_rule+ RIGHT_BRACE;
+}
diff --git a/dump.om b/dump.om
@@ -1,31 +0,0 @@
-func main(argc: int, argv: **byte, envp: **byte) {
- var fd: int;
- var f: *file;
- var out: *file;
- var peg: *peg;
- var a: alloc;
- var len: int;
- var src: *byte;
- var node: *peg_node;
- setup_alloc(&a);
-
- fd = open(argv[1], 0, 0);
- if fd < 0 {
- die("open failed");
- }
-
- f = fopen(fd, &a);
- src = freadall(f, &len);
- fclose(f);
-
- peg = peg_new(argv[1], src, len, &a);
- node = peg_parse(peg, P_sp);
- peg_free(peg);
-
- out = fopen(1, &a);
- peg_show(peg, out, node);
- fputc(out, '\n');
-
- fflush(out);
- fclose(out);
-}
diff --git a/lexer.om b/lexer.om
@@ -1249,3 +1249,203 @@ func lexer_compile_get_tag_name(c: *compiler, pn: *peg_node) {
define_ir_func(c, ic, func_type);
}
+
+func get_tag_table(): *int;
+func get_link_table(): *int;
+func get_tag_name(tag: int): *byte;
+
+struct lex {
+ a: *alloc;
+
+ num_states: int;
+ tag: *int;
+ link: *int;
+ tag_to_str: (func(t:int):*byte);
+
+ fd: int;
+ eof: int;
+ goteof: int;
+
+ buf: *byte;
+ start: int;
+ end: int;
+ fill: int;
+ cap: int;
+
+ match: int;
+ start_lineno: int;
+ start_colno: int;
+ end_lineno: int;
+ end_colno: int;
+}
+
+func setup_lex(a: *alloc): *lex {
+ var l: *lex;
+ var i: int;
+ l = alloc(a, sizeof(*l)) as *lex;
+ l.a = a;
+ l.tag = get_tag_table();
+ l.link = get_link_table();
+ l.tag_to_str = get_tag_name;
+ i = 0;
+ loop {
+ if l.tag[i] == -1 {
+ break;
+ }
+ i = i + 1;
+ }
+ l.num_states = i;
+ l.fd = 0;
+ l.eof = 1;
+ l.goteof = 1;
+ l.buf = nil;
+ l.start = 0;
+ l.end = 0;
+ l.fill = 0;
+ l.cap = 0;
+ l.match = -1;
+ l.start_lineno = 1;
+ l.start_colno = 1;
+ l.end_lineno = 1;
+ l.end_colno = 1;
+ return l;
+}
+
+func open_lex(l: *lex, fd: int) {
+ l.fd = fd;
+ l.eof = 0;
+ l.goteof = 0;
+
+ l.start = 0;
+ l.end = 0;
+ l.fill = 0;
+
+ l.match = -1;
+
+ l.start_lineno = 1;
+ l.start_colno = 1;
+
+ l.end_lineno = 1;
+ l.end_colno = 1;
+}
+
+func gettok(l: *lex): int {
+ var state: int;
+ var ch: int;
+ var tag: int;
+ var ptr: int;
+ var lineno: int;
+ var colno: int;
+ var ret: int;
+ var tmp: *byte;
+
+ loop {
+ ptr = l.end;
+ lineno = l.end_lineno;
+ colno = l.end_colno;
+
+ l.start = ptr;
+ l.start_lineno = lineno;
+ l.start_colno = colno;
+
+ l.match = -1;
+ l.end_lineno = lineno;
+ l.end_colno = colno;
+
+ state = 0;
+ loop {
+ if ptr == l.fill {
+ if l.goteof {
+ if l.start == l.fill {
+ l.eof = 1;
+ }
+ break;
+ }
+
+ if l.fill - l.start >= (l.cap >> 1) {
+ l.cap = l.cap * 2 + 256;
+ tmp = alloc(l.a, l.cap);
+ memcpy(tmp, &l.buf[l.start], l.fill - l.start);
+ free(l.a, l.buf);
+ l.buf = tmp;
+ ptr = ptr - l.start;
+ l.end = l.end - l.start;
+ l.fill = l.fill - l.start;
+ l.start = 0;
+ } else if l.fill >= (l.cap >> 1) {
+ memcpy(l.buf, &l.buf[l.start], l.fill - l.start);
+ ptr = ptr - l.start;
+ l.end = l.end - l.start;
+ l.fill = l.fill - l.start;
+ l.start = 0;
+ }
+
+ ret = read(l.fd, &l.buf[l.fill], l.cap - l.fill);
+ if ret < 0 {
+ die("read failed");
+ }
+
+ if ret == 0 {
+ l.goteof = 1;
+ break;
+ }
+
+ l.fill = l.fill + ret;
+ }
+
+ ch = l.buf[ptr] as int;
+ ptr = ptr + 1;
+
+ if ch == '\n' {
+ lineno = lineno + 1;
+ colno = 1;
+ } else {
+ colno = colno + 1;
+ }
+
+ state = l.link[state * 256 + ch];
+ if state == -1 {
+ break;
+ }
+
+ tag = l.tag[state];
+ if tag {
+ l.match = tag;
+ l.end = ptr;
+ l.end_lineno = lineno;
+ l.end_colno = colno;
+ }
+ }
+
+ if l.match == SPACE || l.match == COMMENT {
+ continue;
+ }
+
+ return l.match;
+ }
+}
+
+func dolex(a: *alloc, name: *byte) {
+ var l: *lex;
+ var fd: int;
+ var tok: int;
+ l = setup_lex(a);
+ fd = open(name, 0, 0);
+ if fd < 0 {
+ die("open failed");
+ }
+ open_lex(l, fd);
+ loop {
+ tok = gettok(l);
+ if tok == -1 {
+ break;
+ }
+ fputs(nil, l.tag_to_str(tok));
+ fputs(nil, "\t'");
+ fputb(nil, &l.buf[l.start], l.end - l.start);
+ fputs(nil, "'\n");
+ }
+ if !l.eof {
+ die("problem");
+ }
+}
diff --git a/lexlib.om b/lexlib.om
@@ -1,197 +0,0 @@
-func get_tag_table(): *int;
-func get_link_table(): *int;
-func get_tag_name(tag: int): *byte;
-
-struct lex {
- a: *alloc;
-
- num_states: int;
- tag: *int;
- link: *int;
-
- fd: int;
- eof: int;
- goteof: int;
-
- buf: *byte;
- start: int;
- end: int;
- fill: int;
- cap: int;
-
- match: int;
- start_lineno: int;
- start_colno: int;
- end_lineno: int;
- end_colno: int;
-}
-
-func setup_lex(a: *alloc): *lex {
- var l: *lex;
- var i: int;
- l = alloc(a, sizeof(*l)) as *lex;
- l.a = a;
- l.tag = get_tag_table();
- l.link = get_link_table();
- i = 0;
- loop {
- if l.tag[i] == -1 {
- break;
- }
- i = i + 1;
- }
- l.num_states = i;
- l.fd = 0;
- l.eof = 1;
- l.goteof = 1;
- l.buf = nil;
- l.start = 0;
- l.end = 0;
- l.fill = 0;
- l.cap = 0;
- l.match = -1;
- l.start_lineno = 1;
- l.start_colno = 1;
- l.end_lineno = 1;
- l.end_colno = 1;
- return l;
-}
-
-func open_lex(l: *lex, fd: int) {
- l.fd = fd;
- l.eof = 0;
- l.goteof = 0;
-
- l.start = 0;
- l.end = 0;
- l.fill = 0;
-
- l.match = -1;
-
- l.start_lineno = 1;
- l.start_colno = 1;
-
- l.end_lineno = 1;
- l.end_colno = 1;
-}
-
-func gettok(l: *lex): int {
- var state: int;
- var ch: int;
- var tag: int;
- var ptr: int;
- var lineno: int;
- var colno: int;
- var ret: int;
- var tmp: *byte;
-
- ptr = l.end;
- lineno = l.end_lineno;
- colno = l.end_colno;
-
- l.start = ptr;
- l.start_lineno = lineno;
- l.start_colno = colno;
-
- l.match = -1;
- l.end_lineno = lineno;
- l.end_colno = colno;
-
- state = 0;
- loop {
- if ptr == l.fill {
- if l.goteof {
- if l.start == l.fill {
- l.eof = 1;
- }
- break;
- }
-
- if l.fill - l.start >= (l.cap >> 1) {
- l.cap = l.cap * 2 + 256;
- tmp = alloc(l.a, l.cap);
- memcpy(tmp, &l.buf[l.start], l.fill - l.start);
- free(l.a, l.buf);
- l.buf = tmp;
- ptr = ptr - l.start;
- l.end = l.end - l.start;
- l.fill = l.fill - l.start;
- l.start = 0;
- } else if l.fill >= (l.cap >> 1) {
- memcpy(l.buf, &l.buf[l.start], l.fill - l.start);
- ptr = ptr - l.start;
- l.end = l.end - l.start;
- l.fill = l.fill - l.start;
- l.start = 0;
- }
-
- ret = read(l.fd, &l.buf[l.fill], l.cap - l.fill);
- if ret < 0 {
- die("read failed");
- }
-
- if ret == 0 {
- l.goteof = 1;
- break;
- }
-
- l.fill = l.fill + ret;
- }
-
- ch = l.buf[ptr] as int;
- ptr = ptr + 1;
-
- if ch == '\n' {
- lineno = lineno + 1;
- colno = 1;
- } else {
- colno = colno + 1;
- }
-
- state = l.link[state * 256 + ch];
- if state == -1 {
- break;
- }
-
- tag = l.tag[state];
- if tag {
- l.match = tag;
- l.end = ptr;
- l.end_lineno = lineno;
- l.end_colno = colno;
- }
- }
-
- return l.match;
-}
-
-func dolex(a: *alloc, name: *byte) {
- var l: *lex;
- var fd: int;
- var tok: int;
- l = setup_lex(a);
- fd = open(name, 0, 0);
- if fd < 0 {
- die("open failed");
- }
- open_lex(l, fd);
- loop {
- tok = gettok(l);
- if tok == -1 {
- break;
- }
- if tok == SPACE {
- continue;
- }
- if tok == COMMENT {
- continue;
- }
- fputs(nil, get_tag_name(tok));
- fputs(nil, "\t'");
- fputb(nil, &l.buf[l.start], l.end - l.start);
- fputs(nil, "'\n");
- }
- if !l.eof {
- die("problem");
- }
-}