summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkdx <kdx@42l.fr>2023-03-14 02:41:02 +0100
committerkdx <kdx@42l.fr>2023-03-14 02:41:02 +0100
commitf76ff3d3300760efead590bef95f0770bf2860af (patch)
treee55a1d2cc7d74aa69ca38bb80be23ea59e7b3e30
parent7b888be4de737b652b3b4ab78229482fcb77bd03 (diff)
downloadgolem-f76ff3d3300760efead590bef95f0770bf2860af.tar.gz
complete lexer
-rw-r--r--README.md12
-rw-r--r--lexer.c49
-rw-r--r--mvp.golem12
-rw-r--r--token.c2
-rw-r--r--token.h22
5 files changed, 70 insertions, 27 deletions
diff --git a/README.md b/README.md
index a49ddce..a9eaa1c 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ alloc(size) {
local ptr;
ptr = g_heap;
- g_heap = (+ g_heap size);
+ g_heap = (add g_heap size);
return heap;
}
@@ -39,12 +39,12 @@ strdup(str) {
local dest;
local i;
- dest = (alloc (+ [str 0] 1));
+ dest = (alloc (add [str 0] 1));
i = 0;
- while (<= i [str 0]) {
+ while (lesseq i [str 0]) {
[dest i] = [str i];
- i = (+ i 1);
+ i = (add i 1);
}
return dest;
@@ -54,9 +54,9 @@ print(str) {
local i;
i = 1;
- while (<= i [str 0]) {
+ while (lesseq i [str 0]) {
(putchar [str i]);
- i = i + 1;
+ i = (add i 1);
}
return 0;
diff --git a/lexer.c b/lexer.c
index d1cba7f..803949e 100644
--- a/lexer.c
+++ b/lexer.c
@@ -5,23 +5,35 @@
#include <stdio.h>
#include <stdlib.h>
+static int
+iswordy(int c)
+{
+ if (isalpha(c) || c == '_')
+ return 1;
+ return 0;
+}
+
static unsigned int
simple_token(char c)
{
switch (c) {
- case '(': return TOK_OPEN_PAREN;
- case ')': return TOK_CLOS_PAREN;
case '{': return TOK_OPEN_CURL;
case '}': return TOK_CLOS_CURL;
+ case '(': return TOK_OPEN_PAREN;
+ case ')': return TOK_CLOS_PAREN;
+ case '[': return TOK_OPEN_SQUAR;
+ case ']': return TOK_CLOS_SQUAR;
+ case '=': return TOK_ASSIGN;
+ case ';': return TOK_END;
default: return TOK_NONE;
}
}
static Slice
-slice_number(Slice slice)
+slice_match(Slice slice, int (*isfun)(int c))
{
for (int i = slice.begin; i < slice.end; i++)
- if (!isdigit(slice.str[i]))
+ if (!isfun(slice.str[i]))
return slice_sub(slice, slice.begin, i);
return slice;
}
@@ -57,12 +69,39 @@ lexer(Slice slice)
// Number token.
if (isdigit(slice.str[i])) {
Slice number;
- number = slice_number(slice_sub(slice, i, slice.end));
+ number = slice_match(slice_sub(slice, i, slice.end),
+ isdigit);
token_append(&toks, token_create(number, TOK_NUMBER));
i = number.end;
continue;
}
+ // Word token.
+ if (iswordy(slice.str[i])) {
+ Slice word = slice_match(slice_sub(slice, i, slice.end),
+ iswordy);
+ token_append(&toks, token_create(word, TOK_NUMBER));
+ i = word.end;
+ continue;
+ }
+
+ // String token.
+ if (slice.str[i] == '"') {
+ int close = i + 1;
+ while (close < slice.end && slice.str[close] != '"')
+ close += 1;
+ if (slice.str[close] != '"') {
+ // XXX: Streamline this kind of error handling.
+ fprintf(stderr, "unclosed string\n");
+ token_destroy(toks);
+ return NULL;
+ }
+ Slice string = slice_sub(slice, i + 1, close);
+ token_append(&toks, token_create(string, TOK_STRING));
+ i = close + 1;
+ continue;
+ }
+
printf("skipping '%c'\n", slice.str[i]);
i += 1;
}
diff --git a/mvp.golem b/mvp.golem
index 90e1b15..9877673 100644
--- a/mvp.golem
+++ b/mvp.golem
@@ -23,7 +23,7 @@ alloc(size) {
local ptr;
ptr = g_heap;
- g_heap = (+ g_heap size);
+ g_heap = (add g_heap size);
return heap;
}
@@ -32,12 +32,12 @@ strdup(str) {
local dest;
local i;
- dest = (alloc (+ [str 0] 1));
+ dest = (alloc (add [str 0] 1));
i = 0;
- while (<= i [str 0]) {
+ while (lesseq i [str 0]) {
[dest i] = [str i];
- i = (+ i 1);
+ i = (add i 1);
}
return dest;
@@ -47,9 +47,9 @@ print(str) {
local i;
i = 1;
- while (<= i [str 0]) {
+ while (lesseq i [str 0]) {
(putchar [str i]);
- i = i + 1;
+ i = (add i 1);
}
return 0;
diff --git a/token.c b/token.c
index 22facec..51aaab3 100644
--- a/token.c
+++ b/token.c
@@ -47,7 +47,7 @@ token_print(Token *token)
{
Token *tok = token;
while (token != NULL) {
- printf("type: %2u slice: ", token->type);
+ printf("type: %c slice: ", (char)token->type);
slice_print(token->slice);
token = token->next;
}
diff --git a/token.h b/token.h
index a46593d..9c6de47 100644
--- a/token.h
+++ b/token.h
@@ -2,15 +2,19 @@
#include "slice.h"
enum {
- TOK_NONE,
- TOK_OPEN_CURL,
- TOK_CLOS_CURL,
- TOK_OPEN_PAREN,
- TOK_CLOS_PAREN,
- TOK_NUMBER,
- TOK_CHARACTER,
- TOK_STRING,
- TOK_WORD,
+ TOK_NONE = 0,
+ TOK_OPEN_CURL = '{',
+ TOK_CLOS_CURL = '}',
+ TOK_OPEN_PAREN = '(',
+ TOK_CLOS_PAREN = ')',
+ TOK_OPEN_SQUAR = '[',
+ TOK_CLOS_SQUAR = ']',
+ TOK_ASSIGN = '=',
+ TOK_END = ';',
+ TOK_NUMBER = '0',
+ TOK_CHARACTER = 'a',
+ TOK_STRING = 's',
+ TOK_WORD = 'w',
};
typedef struct Token Token;