diff options
author | kdx <kdx@42l.fr> | 2023-03-14 02:41:02 +0100 |
---|---|---|
committer | kdx <kdx@42l.fr> | 2023-03-14 02:41:02 +0100 |
commit | f76ff3d3300760efead590bef95f0770bf2860af (patch) | |
tree | e55a1d2cc7d74aa69ca38bb80be23ea59e7b3e30 | |
parent | 7b888be4de737b652b3b4ab78229482fcb77bd03 (diff) | |
download | golem-f76ff3d3300760efead590bef95f0770bf2860af.tar.gz |
complete lexer
-rw-r--r-- | README.md | 12 | ||||
-rw-r--r-- | lexer.c | 49 | ||||
-rw-r--r-- | mvp.golem | 12 | ||||
-rw-r--r-- | token.c | 2 | ||||
-rw-r--r-- | token.h | 22 |
5 files changed, 70 insertions, 27 deletions
@@ -30,7 +30,7 @@ alloc(size) { local ptr; ptr = g_heap; - g_heap = (+ g_heap size); + g_heap = (add g_heap size); return heap; } @@ -39,12 +39,12 @@ strdup(str) { local dest; local i; - dest = (alloc (+ [str 0] 1)); + dest = (alloc (add [str 0] 1)); i = 0; - while (<= i [str 0]) { + while (lesseq i [str 0]) { [dest i] = [str i]; - i = (+ i 1); + i = (add i 1); } return dest; @@ -54,9 +54,9 @@ print(str) { local i; i = 1; - while (<= i [str 0]) { + while (lesseq i [str 0]) { (putchar [str i]); - i = i + 1; + i = (add i 1); } return 0; @@ -5,23 +5,35 @@ #include <stdio.h> #include <stdlib.h> +static int +iswordy(int c) +{ + if (isalpha(c) || c == '_') + return 1; + return 0; +} + static unsigned int simple_token(char c) { switch (c) { - case '(': return TOK_OPEN_PAREN; - case ')': return TOK_CLOS_PAREN; case '{': return TOK_OPEN_CURL; case '}': return TOK_CLOS_CURL; + case '(': return TOK_OPEN_PAREN; + case ')': return TOK_CLOS_PAREN; + case '[': return TOK_OPEN_SQUAR; + case ']': return TOK_CLOS_SQUAR; + case '=': return TOK_ASSIGN; + case ';': return TOK_END; default: return TOK_NONE; } } static Slice -slice_number(Slice slice) +slice_match(Slice slice, int (*isfun)(int c)) { for (int i = slice.begin; i < slice.end; i++) - if (!isdigit(slice.str[i])) + if (!isfun(slice.str[i])) return slice_sub(slice, slice.begin, i); return slice; } @@ -57,12 +69,39 @@ lexer(Slice slice) // Number token. if (isdigit(slice.str[i])) { Slice number; - number = slice_number(slice_sub(slice, i, slice.end)); + number = slice_match(slice_sub(slice, i, slice.end), + isdigit); token_append(&toks, token_create(number, TOK_NUMBER)); i = number.end; continue; } + // Word token. + if (iswordy(slice.str[i])) { + Slice word = slice_match(slice_sub(slice, i, slice.end), + iswordy); + token_append(&toks, token_create(word, TOK_NUMBER)); + i = word.end; + continue; + } + + // String token. + if (slice.str[i] == '"') { + int close = i + 1; + while (close < slice.end && slice.str[close] != '"') + close += 1; + if (slice.str[close] != '"') { + // XXX: Streamline this kind of error handling. + fprintf(stderr, "unclosed string\n"); + token_destroy(toks); + return NULL; + } + Slice string = slice_sub(slice, i + 1, close); + token_append(&toks, token_create(string, TOK_STRING)); + i = close + 1; + continue; + } + printf("skipping '%c'\n", slice.str[i]); i += 1; } @@ -23,7 +23,7 @@ alloc(size) { local ptr; ptr = g_heap; - g_heap = (+ g_heap size); + g_heap = (add g_heap size); return heap; } @@ -32,12 +32,12 @@ strdup(str) { local dest; local i; - dest = (alloc (+ [str 0] 1)); + dest = (alloc (add [str 0] 1)); i = 0; - while (<= i [str 0]) { + while (lesseq i [str 0]) { [dest i] = [str i]; - i = (+ i 1); + i = (add i 1); } return dest; @@ -47,9 +47,9 @@ print(str) { local i; i = 1; - while (<= i [str 0]) { + while (lesseq i [str 0]) { (putchar [str i]); - i = i + 1; + i = (add i 1); } return 0; @@ -47,7 +47,7 @@ token_print(Token *token) { Token *tok = token; while (token != NULL) { - printf("type: %2u slice: ", token->type); + printf("type: %c slice: ", (char)token->type); slice_print(token->slice); token = token->next; } @@ -2,15 +2,19 @@ #include "slice.h" enum { - TOK_NONE, - TOK_OPEN_CURL, - TOK_CLOS_CURL, - TOK_OPEN_PAREN, - TOK_CLOS_PAREN, - TOK_NUMBER, - TOK_CHARACTER, - TOK_STRING, - TOK_WORD, + TOK_NONE = 0, + TOK_OPEN_CURL = '{', + TOK_CLOS_CURL = '}', + TOK_OPEN_PAREN = '(', + TOK_CLOS_PAREN = ')', + TOK_OPEN_SQUAR = '[', + TOK_CLOS_SQUAR = ']', + TOK_ASSIGN = '=', + TOK_END = ';', + TOK_NUMBER = '0', + TOK_CHARACTER = 'a', + TOK_STRING = 's', + TOK_WORD = 'w', }; typedef struct Token Token; |