From 3ab936fdda8bd218560c0839229be05cd8c4673e Mon Sep 17 00:00:00 2001 From: kdx Date: Fri, 20 Jan 2023 05:03:22 +0100 Subject: precise token position --- Token.c | 35 ++++++++++++++++++++++++++++++++--- Token.h | 8 +++++++- lexer.c | 11 +++++++++-- main.c | 6 ++++++ parse.c | 21 +++++++++++++++++++++ parse.h | 4 ++++ 6 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 parse.c create mode 100644 parse.h diff --git a/Token.c b/Token.c index 84eae84..85d65cf 100644 --- a/Token.c +++ b/Token.c @@ -5,7 +5,7 @@ void token_free(Token *tok) { switch (tok->type) { - case TOK_KEYWORD: + case TOK_WORD: if (tok != NULL && tok->v.s != NULL) free(tok->v.s); default: @@ -15,9 +15,9 @@ void token_free(Token *tok) void token_print(const Token *tok) { - printf("%d ", tok->line); + printf("%u:%u\t", tok->line, tok->column); switch (tok->type) { - case TOK_KEYWORD: printf("TOK_KEYWORD %s\n", tok->v.s); break; + case TOK_WORD: printf("TOK_WORD %s\n", tok->v.s); break; case TOK_STRING: printf("TOK_STRING \"%s\"\n", tok->v.s); break; case TOK_INTEGER: printf("TOK_INTEGER %d\n", tok->v.i); break; case TOK_PAREN_OPEN: printf("TOK_PAREN_OPEN\n"); break; @@ -39,3 +39,32 @@ void token_print(const Token *tok) default: printf("token_print TODO: type %d\n", tok->type); break; } } + +const char *token_type_str(unsigned int type) +{ + switch (type) { + case TOK_WORD: return "TOK_WORD"; + case TOK_STRING: return "TOK_STRING"; + case TOK_INTEGER: return "TOK_INTEGER"; + case TOK_PAREN_OPEN: return "TOK_PAREN_OPEN"; + case TOK_PAREN_CLOS: return "TOK_PAREN_CLOS"; + case TOK_CURL_OPEN: return "TOK_CURL_OPEN"; + case TOK_CURL_CLOS: return "TOK_CURL_CLOS"; + case TOK_SQUAR_OPEN: return "TOK_SQUAR_OPEN"; + case TOK_SQUAR_CLOS: return "TOK_SQUAR_CLOS"; + case TOK_COLON: return "TOK_COLON"; + case TOK_SEMICOLON: return "TOK_SEMICOLON"; + case TOK_ASSIGN: return "TOK_ASSIGN"; + case TOK_COMMA: return "TOK_COMMA"; + case TOK_COMP_LESS: return "TOK_COMP_LESS"; + case TOK_MODULO: return "TOK_MODULO"; + case TOK_STAR: return "TOK_STAR"; + case TOK_INCREMENT: return "TOK_INCREMENT"; + case TOK_COMP_EQ: return "TOK_COMP_EQ"; + case TOK_COMP_NEQ: return "TOK_COMP_NEQ"; + case TOK_KW_CONST: return "TOK_KW_CONST"; + case TOK_KW_VAR: return "TOK_KW_VAR"; + case TOK_KW_FN: return "TOK_KW_FN"; + default: return "TOK_?"; + } +} diff --git a/Token.h b/Token.h index 1478199..1ff528a 100644 --- a/Token.h +++ b/Token.h @@ -3,7 +3,7 @@ enum { TOK_NONE, - TOK_KEYWORD, + TOK_WORD, TOK_STRING, TOK_INTEGER, /* single char toks */ @@ -24,6 +24,10 @@ enum { TOK_INCREMENT, TOK_COMP_EQ, TOK_COMP_NEQ, + /* keywords */ + TOK_KW_VAR, + TOK_KW_CONST, + TOK_KW_FN, }; union TokenValue { @@ -37,8 +41,10 @@ union TokenValue { typedef struct Token { unsigned int type; unsigned int line; + unsigned int column; union TokenValue v; } Token; void token_free(Token *tok); void token_print(const Token *tok); +const char *token_type_str(const unsigned int type); diff --git a/lexer.c b/lexer.c index 3ad2109..c42ac3b 100644 --- a/lexer.c +++ b/lexer.c @@ -52,6 +52,7 @@ Token *lexer(const char *s) size_t size = 128; Token *toks = calloc(size + 1, sizeof(Token)); size_t tok_i = 0; + size_t column = 1; size_t line = 1; if (toks == NULL) { perror("lexer"); @@ -60,13 +61,18 @@ Token *lexer(const char *s) while (*s != '\0') { /* Skip whitespaces. */ while (isspace(*s)) { - if (*s == '\n') + if (*s == '\n') { + column = 1; line += 1; + } else + column += 1; s += 1; } if (*s == '\0') break; + toks[tok_i].column = column; toks[tok_i].line = line; + const char *rem_s = s; if (*s == '"') { const char *end = strchr(s + 1, '"'); if (end == NULL) { @@ -96,7 +102,7 @@ Token *lexer(const char *s) return NULL; } strncpy(toks[tok_i].v.s, s, len); - toks[tok_i].type = TOK_KEYWORD; + toks[tok_i].type = TOK_WORD; tok_i += 1; s += len; } else if (isdigit(*s)) { @@ -126,6 +132,7 @@ Token *lexer(const char *s) if (toks == NULL) return NULL; } + column += s - rem_s; } return toks; } diff --git a/main.c b/main.c index a1f9439..8cca483 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,7 @@ +#include "Token.h" #include "drain.h" #include "lexer.h" +#include "parse.h" #include #include @@ -27,6 +29,10 @@ int main(int argc, char **argv) return 1; } lexer_print(toks); + if (parse(toks)) { + fprintf(stderr, "parser failed\n"); + return 1; + } lexer_free(toks); return 0; } diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..b1b5c93 --- /dev/null +++ b/parse.c @@ -0,0 +1,21 @@ +#include "parse.h" +#include + +int parse(const Token *tok) +{ + while (tok->type != TOK_NONE) { + switch (tok->type) { + case TOK_KW_FN: + case TOK_KW_VAR: + case TOK_KW_CONST: + break; + default: + fprintf(stderr, "unexpected %s at %u:%u\n", + token_type_str(tok->type), + tok->line, tok->column); + return 1; + } + tok += 1; + } + return 0; +} diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..413a791 --- /dev/null +++ b/parse.h @@ -0,0 +1,4 @@ +#pragma once +#include "Token.h" + +int parse(const Token *toks); -- cgit v1.2.3