summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkdx <kdx@42l.fr>2023-01-20 05:03:22 +0100
committerkdx <kdx@42l.fr>2023-01-20 05:03:22 +0100
commit3ab936fdda8bd218560c0839229be05cd8c4673e (patch)
treef56f26e3d8fffd010c889677aecf0baa5af35590
parent072f0d4ea7d1da479baea85fadead5f4d640eff2 (diff)
downloadgolem-3ab936fdda8bd218560c0839229be05cd8c4673e.tar.gz
precise token position
-rw-r--r--Token.c35
-rw-r--r--Token.h8
-rw-r--r--lexer.c11
-rw-r--r--main.c6
-rw-r--r--parse.c21
-rw-r--r--parse.h4
6 files changed, 79 insertions, 6 deletions
diff --git a/Token.c b/Token.c
index 84eae84..85d65cf 100644
--- a/Token.c
+++ b/Token.c
@@ -5,7 +5,7 @@
void token_free(Token *tok)
{
switch (tok->type) {
- case TOK_KEYWORD:
+ case TOK_WORD:
if (tok != NULL && tok->v.s != NULL)
free(tok->v.s);
default:
@@ -15,9 +15,9 @@ void token_free(Token *tok)
void token_print(const Token *tok)
{
- printf("%d ", tok->line);
+ printf("%u:%u\t", tok->line, tok->column);
switch (tok->type) {
- case TOK_KEYWORD: printf("TOK_KEYWORD %s\n", tok->v.s); break;
+ case TOK_WORD: printf("TOK_WORD %s\n", tok->v.s); break;
case TOK_STRING: printf("TOK_STRING \"%s\"\n", tok->v.s); break;
case TOK_INTEGER: printf("TOK_INTEGER %d\n", tok->v.i); break;
case TOK_PAREN_OPEN: printf("TOK_PAREN_OPEN\n"); break;
@@ -39,3 +39,32 @@ void token_print(const Token *tok)
default: printf("token_print TODO: type %d\n", tok->type); break;
}
}
+
+const char *token_type_str(unsigned int type)
+{
+ switch (type) {
+ case TOK_WORD: return "TOK_WORD";
+ case TOK_STRING: return "TOK_STRING";
+ case TOK_INTEGER: return "TOK_INTEGER";
+ case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN";
+ case TOK_PAREN_CLOS: return "TOK_PAREN_CLOS";
+ case TOK_CURL_OPEN: return "TOK_CURL_OPEN";
+ case TOK_CURL_CLOS: return "TOK_CURL_CLOS";
+ case TOK_SQUAR_OPEN: return "TOK_SQUAR_OPEN";
+ case TOK_SQUAR_CLOS: return "TOK_SQUAR_CLOS";
+ case TOK_COLON: return "TOK_COLON";
+ case TOK_SEMICOLON: return "TOK_SEMICOLON";
+ case TOK_ASSIGN: return "TOK_ASSIGN";
+ case TOK_COMMA: return "TOK_COMMA";
+ case TOK_COMP_LESS: return "TOK_COMP_LESS";
+ case TOK_MODULO: return "TOK_MODULO";
+ case TOK_STAR: return "TOK_STAR";
+ case TOK_INCREMENT: return "TOK_INCREMENT";
+ case TOK_COMP_EQ: return "TOK_COMP_EQ";
+ case TOK_COMP_NEQ: return "TOK_COMP_NEQ";
+ case TOK_KW_CONST: return "TOK_KW_CONST";
+ case TOK_KW_VAR: return "TOK_KW_VAR";
+ case TOK_KW_FN: return "TOK_KW_FN";
+ default: return "TOK_?";
+ }
+}
diff --git a/Token.h b/Token.h
index 1478199..1ff528a 100644
--- a/Token.h
+++ b/Token.h
@@ -3,7 +3,7 @@
enum {
TOK_NONE,
- TOK_KEYWORD,
+ TOK_WORD,
TOK_STRING,
TOK_INTEGER,
/* single char toks */
@@ -24,6 +24,10 @@ enum {
TOK_INCREMENT,
TOK_COMP_EQ,
TOK_COMP_NEQ,
+ /* keywords */
+ TOK_KW_VAR,
+ TOK_KW_CONST,
+ TOK_KW_FN,
};
union TokenValue {
@@ -37,8 +41,10 @@ union TokenValue {
typedef struct Token {
unsigned int type;
unsigned int line;
+ unsigned int column;
union TokenValue v;
} Token;
void token_free(Token *tok);
void token_print(const Token *tok);
+const char *token_type_str(const unsigned int type);
diff --git a/lexer.c b/lexer.c
index 3ad2109..c42ac3b 100644
--- a/lexer.c
+++ b/lexer.c
@@ -52,6 +52,7 @@ Token *lexer(const char *s)
size_t size = 128;
Token *toks = calloc(size + 1, sizeof(Token));
size_t tok_i = 0;
+ size_t column = 1;
size_t line = 1;
if (toks == NULL) {
perror("lexer");
@@ -60,13 +61,18 @@ Token *lexer(const char *s)
while (*s != '\0') {
/* Skip whitespaces. */
while (isspace(*s)) {
- if (*s == '\n')
+ if (*s == '\n') {
+ column = 1;
line += 1;
+ } else
+ column += 1;
s += 1;
}
if (*s == '\0')
break;
+ toks[tok_i].column = column;
toks[tok_i].line = line;
+ const char *rem_s = s;
if (*s == '"') {
const char *end = strchr(s + 1, '"');
if (end == NULL) {
@@ -96,7 +102,7 @@ Token *lexer(const char *s)
return NULL;
}
strncpy(toks[tok_i].v.s, s, len);
- toks[tok_i].type = TOK_KEYWORD;
+ toks[tok_i].type = TOK_WORD;
tok_i += 1;
s += len;
} else if (isdigit(*s)) {
@@ -126,6 +132,7 @@ Token *lexer(const char *s)
if (toks == NULL)
return NULL;
}
+ column += s - rem_s;
}
return toks;
}
diff --git a/main.c b/main.c
index a1f9439..8cca483 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,7 @@
+#include "Token.h"
#include "drain.h"
#include "lexer.h"
+#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
@@ -27,6 +29,10 @@ int main(int argc, char **argv)
return 1;
}
lexer_print(toks);
+ if (parse(toks)) {
+ fprintf(stderr, "parser failed\n");
+ return 1;
+ }
lexer_free(toks);
return 0;
}
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..b1b5c93
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,21 @@
+#include "parse.h"
+#include <stdio.h>
+
+int parse(const Token *tok)
+{
+ while (tok->type != TOK_NONE) {
+ switch (tok->type) {
+ case TOK_KW_FN:
+ case TOK_KW_VAR:
+ case TOK_KW_CONST:
+ break;
+ default:
+ fprintf(stderr, "unexpected %s at %u:%u\n",
+ token_type_str(tok->type),
+ tok->line, tok->column);
+ return 1;
+ }
+ tok += 1;
+ }
+ return 0;
+}
diff --git a/parse.h b/parse.h
new file mode 100644
index 0000000..413a791
--- /dev/null
+++ b/parse.h
@@ -0,0 +1,4 @@
+#pragma once
+#include "Token.h"
+
+int parse(const Token *toks);