diff options
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 142 |
1 files changed, 142 insertions, 0 deletions
@@ -0,0 +1,142 @@ +#include "lexer.h" +#include <ctype.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +static Token *resize_toks(Token *toks, size_t *size) +{ + *size += 128; + Token *const new_toks = realloc(toks, sizeof(Token) * (*size + 1)); + if (new_toks == NULL) { + perror("resize_toks"); + lexer_free(toks); + return NULL; + } + return new_toks; +} + +static unsigned int one_wide_tok(const char *s) +{ + switch (*s) { + case '(': return TOK_PAREN_OPEN; + case ')': return TOK_PAREN_CLOS; + case '{': return TOK_CURL_OPEN; + case '}': return TOK_CURL_CLOS; + case '[': return TOK_SQUAR_OPEN; + case ']': return TOK_SQUAR_CLOS; + case ';': return TOK_SEMICOLON; + case '=': return TOK_ASSIGN; + case ',': return TOK_COMMA; + case '<': return TOK_COMP_LESS; + case '%': return TOK_MODULO; + default: return TOK_NONE; + } +} + +#define PAIR(a, b) ((int)(a) + (int)(b) * 256) +static unsigned int two_wide_tok(const char *s) +{ + switch (PAIR(s[0], s[1])) { + case PAIR('+', '+'): return TOK_INCREMENT; + default: return TOK_NONE; + } +} + +Token *lexer(const char *s) +{ + size_t size = 128; + Token *toks = calloc(size + 1, sizeof(Token)); + size_t tok_i = 0; + size_t line = 1; + if (toks == NULL) { + perror("lexer"); + return NULL; + } + while (*s != '\0') { + /* Skip whitespaces. */ + while (isspace(*s)) { + if (*s == '\n') + line += 1; + s += 1; + } + if (*s == '\0') + break; + toks[tok_i].line = line; + if (*s == '"') { + const char *end = strchr(s + 1, '"'); + if (end == NULL) { + printf("unclosed string\n"); + lexer_free(toks); + return NULL; + } + size_t len = end - s - 1; + toks[tok_i].v.s = calloc(1, len + 1); + if (toks[tok_i].v.s == NULL) { + perror("lexer"); + lexer_free(toks); + return NULL; + } + toks[tok_i].type = TOK_STRING; + strncpy(toks[tok_i].v.s, s + 1, len); + tok_i += 1; + s = end + 1; + } else if (isalpha(*s) || *s == '_') { + size_t len = 0; + while (isalnum(s[len]) || s[len] == '_') + len += 1; + toks[tok_i].v.s = calloc(1, len + 1); + if (toks[tok_i].v.s == NULL) { + perror("lexer"); + lexer_free(toks); + return NULL; + } + strncpy(toks[tok_i].v.s, s, len); + toks[tok_i].type = TOK_KEYWORD; + tok_i += 1; + s += len; + } else if (isdigit(*s)) { + size_t len = 0; + while (isdigit(s[len])) + len += 1; + toks[tok_i].v.i = atoi(s); + toks[tok_i].type = TOK_INTEGER; + tok_i += 1; + s += len; + } else if (two_wide_tok(s) != TOK_NONE) { + toks[tok_i].type = two_wide_tok(s); + toks[tok_i].v.c = *s; + tok_i += 1; + s += 2; + } else if (one_wide_tok(s) != TOK_NONE) { + toks[tok_i].type = one_wide_tok(s); + toks[tok_i].v.c = *s; + tok_i += 1; + s += 1; + } else { + printf("wtf is this shit? %c\n", *s); + s += 1; + } + if (tok_i == size) { + toks = resize_toks(toks, &size); + if (toks == NULL) + return NULL; + } + } + return toks; +} + +void lexer_free(Token *toks) +{ + if (toks != NULL) { + for (Token *tok = toks; tok->type != TOK_NONE; tok += 1) + token_free(tok); + free(toks); + } +} + +void lexer_print(const Token *toks) +{ + for (const Token *tok = toks; tok->type != TOK_NONE; tok += 1) + token_print(tok); +} |