#include "lexer.h" #include "slice.h" #include "token.h" #include #include #include static int iswordy(int c) { if (isalpha(c) || c == '_') return 1; return 0; } static unsigned int simple_token(char c) { switch (c) { case '{': return TOK_OPEN_CURL; case '}': return TOK_CLOS_CURL; case '(': return TOK_OPEN_PAREN; case ')': return TOK_CLOS_PAREN; case '[': return TOK_OPEN_SQUAR; case ']': return TOK_CLOS_SQUAR; case '=': return TOK_ASSIGN; case ';': return TOK_END; default: return TOK_NONE; } } static Slice slice_match(Slice slice, int (*isfun)(int c)) { for (int i = slice.begin; i < slice.end; i++) if (!isfun(slice.str[i])) return slice_sub(slice, slice.begin, i); return slice; } Token * lexer(Slice slice) { Token *toks = NULL; int i = slice.begin; while (i < slice.end) { // Skip whitespaces. if (isspace(slice.str[i])) { i += 1; continue; } // Ignore comments. if (slice.str[i] == '/' && slice.str[i + 1] == '/') { while (i < slice.end && slice.str[i] != '\n') i += 1; continue; } // Single character tokens. if (simple_token(slice.str[i]) != TOK_NONE) { Slice sub = slice_sub(slice, i, i + 1); token_append(&toks, token_create(sub, simple_token(slice.str[i]))); if (slice.str[i] == '}') token_append(&toks, token_create(sub, TOK_END)); i += 1; continue; } // Number token. if (isdigit(slice.str[i])) { Slice number; number = slice_match(slice_sub(slice, i, slice.end), isdigit); token_append(&toks, token_create(number, TOK_NUMBER)); i = number.end; continue; } // Word token, and keywords by extension. if (iswordy(slice.str[i])) { Slice word = slice_match(slice_sub(slice, i, slice.end), iswordy); // Keywords. unsigned int type = TOK_WORD; if (slice_equal(word, slice_from_str("while"))) type = TOK_KW_WHILE; else if (slice_equal(word, slice_from_str("if"))) type = TOK_KW_IF; else if (slice_equal(word, slice_from_str("else"))) type = TOK_KW_ELSE; else if (slice_equal(word, slice_from_str("let"))) type = TOK_KW_LET; else if (slice_equal(word, slice_from_str("return"))) type = TOK_KW_RETURN; token_append(&toks, token_create(word, type)); i = word.end; continue; } // String token. if (slice.str[i] == '"') { int close = i + 1; while (close < slice.end && slice.str[close] != '"') close += 1; if (slice.str[close] != '"') { // XXX: Streamline this kind of error handling. fprintf(stderr, "unclosed string\n"); token_destroy(toks); return NULL; } Slice string = slice_sub(slice, i + 1, close); token_append(&toks, token_create(string, TOK_STRING)); i = close + 1; continue; } // Character token. if (slice.str[i] == '\'') { if (slice.str[i + 1] == '\0' || slice.str[i + 2] != '\'') { fprintf(stderr, "unclosed character\n"); token_destroy(toks); return NULL; } Slice character = slice_sub(slice, i + 1, i + 2); token_append(&toks, token_create(character, TOK_CHAR)); i += 3; continue; } printf("skipping '%c'\n", slice.str[i]); i += 1; } return toks; }