#include "lexer.h" #include #include #include #include static Token *resize_toks(Token *toks, size_t *size) { *size += 128; Token *const new_toks = realloc(toks, sizeof(Token) * (*size + 1)); if (new_toks == NULL) { perror("resize_toks"); lexer_free(toks); return NULL; } return new_toks; } static unsigned int one_wide_tok(const char *s) { switch (*s) { case '(': return TOK_PAREN_OPEN; case ')': return TOK_PAREN_CLOS; case '{': return TOK_CURL_OPEN; case '}': return TOK_CURL_CLOS; case '[': return TOK_SQUAR_OPEN; case ']': return TOK_SQUAR_CLOS; case ':': return TOK_COLON; case ';': return TOK_SEMICOLON; case '=': return TOK_ASSIGN; case ',': return TOK_COMMA; case '<': return TOK_COMP_LESS; case '>': return TOK_COMP_MORE; case '+': return TOK_ADD; case '-': return TOK_SUB; case '*': return TOK_MUL; case '/': return TOK_DIV; case '%': return TOK_MOD; case '&': return TOK_AND; case '|': return TOK_OR; case '^': return TOK_XOR; case '$': return TOK_REF; case '!': return TOK_NOT; default: return TOK_NONE; } } #define PAIR(a, b) ((int)(a) + (int)(b) * 256) static unsigned int two_wide_tok(const char *s) { switch (PAIR(s[0], s[1])) { case PAIR('=', '='): return TOK_COMP_EQ; case PAIR('!', '='): return TOK_COMP_NEQ; case PAIR('/', '/'): return TOK_COMMENT; case PAIR('<', '='): return TOK_COMP_LESSEQ; case PAIR('>', '='): return TOK_COMP_MOREEQ; case PAIR('|', '|'): return TOK_COMP_OR; case PAIR('&', '&'): return TOK_COMP_AND; default: return TOK_NONE; } } Token *lexer(const char *s) { size_t size = 128; Token *toks = calloc(size + 1, sizeof(Token)); size_t tok_i = 0; size_t column = 1; size_t line = 1; if (toks == NULL) { perror("lexer"); return NULL; } while (*s != '\0') { /* Skip whitespaces. */ while (isspace(*s)) { if (*s == '\n') { column = 1; line += 1; } else column += 1; s += 1; } if (*s == '\0') break; toks[tok_i].column = column; toks[tok_i].line = line; const char *rem_s = s; const int two_wide = two_wide_tok(s); if (*s == '"') { const char *end = strchr(s + 1, '"'); if (end == NULL) { printf("unclosed string\n"); lexer_free(toks); return NULL; } size_t len = end - s - 1; toks[tok_i].s = calloc(1, len + 1); if (toks[tok_i].s == NULL) { perror("lexer"); lexer_free(toks); return NULL; } toks[tok_i].type = TOK_STRING; strncpy(toks[tok_i].s, s + 1, len); tok_i += 1; s = end + 1; } else if (isalpha(*s) || *s == '_') { size_t len = 0; while (isalnum(s[len]) || s[len] == '_') len += 1; toks[tok_i].s = calloc(1, len + 1); if (toks[tok_i].s == NULL) { perror("lexer"); lexer_free(toks); return NULL; } strncpy(toks[tok_i].s, s, len); if (strcmp("fn", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_FN; else if (strcmp("var", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_VAR; else if (strcmp("const", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_CONST; else if (strcmp("if", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_IF; else if (strcmp("else", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_ELSE; else if (strcmp("while", toks[tok_i].s) == 0) toks[tok_i].type = TOK_KW_WHILE; else toks[tok_i].type = TOK_WORD; tok_i += 1; s += len; } else if (isdigit(*s)) { size_t len = 0; while (isdigit(s[len])) len += 1; toks[tok_i].v.i = atoi(s); toks[tok_i].type = TOK_INTEGER; tok_i += 1; s += len; } else if (two_wide == TOK_COMMENT) { while (*s != '\0' && *s != '\n') s += 1; } else if (two_wide != TOK_NONE) { toks[tok_i].type = two_wide; toks[tok_i].v.c = *s; tok_i += 1; s += 2; } else if (one_wide_tok(s) != TOK_NONE) { toks[tok_i].type = one_wide_tok(s); toks[tok_i].v.c = *s; tok_i += 1; s += 1; } else { printf("wtf is this shit? %c\n", *s); s += 1; } if (tok_i == size) { toks = resize_toks(toks, &size); if (toks == NULL) return NULL; } column += s - rem_s; } return toks; } void lexer_free(Token *toks) { if (toks != NULL) { for (Token *tok = toks; tok->type != TOK_NONE; tok += 1) token_free(tok); free(toks); } } void lexer_print(const Token *toks) { for (const Token *tok = toks; tok->type != TOK_NONE; tok += 1) token_print(tok); }