summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c142
1 files changed, 142 insertions, 0 deletions
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..a68b93e
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,142 @@
+#include "lexer.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+static Token *resize_toks(Token *toks, size_t *size)
+{
+ *size += 128;
+ Token *const new_toks = realloc(toks, sizeof(Token) * (*size + 1));
+ if (new_toks == NULL) {
+ perror("resize_toks");
+ lexer_free(toks);
+ return NULL;
+ }
+ return new_toks;
+}
+
+static unsigned int one_wide_tok(const char *s)
+{
+ switch (*s) {
+ case '(': return TOK_PAREN_OPEN;
+ case ')': return TOK_PAREN_CLOS;
+ case '{': return TOK_CURL_OPEN;
+ case '}': return TOK_CURL_CLOS;
+ case '[': return TOK_SQUAR_OPEN;
+ case ']': return TOK_SQUAR_CLOS;
+ case ';': return TOK_SEMICOLON;
+ case '=': return TOK_ASSIGN;
+ case ',': return TOK_COMMA;
+ case '<': return TOK_COMP_LESS;
+ case '%': return TOK_MODULO;
+ default: return TOK_NONE;
+ }
+}
+
+#define PAIR(a, b) ((int)(a) + (int)(b) * 256)
+static unsigned int two_wide_tok(const char *s)
+{
+ switch (PAIR(s[0], s[1])) {
+ case PAIR('+', '+'): return TOK_INCREMENT;
+ default: return TOK_NONE;
+ }
+}
+
+Token *lexer(const char *s)
+{
+ size_t size = 128;
+ Token *toks = calloc(size + 1, sizeof(Token));
+ size_t tok_i = 0;
+ size_t line = 1;
+ if (toks == NULL) {
+ perror("lexer");
+ return NULL;
+ }
+ while (*s != '\0') {
+ /* Skip whitespaces. */
+ while (isspace(*s)) {
+ if (*s == '\n')
+ line += 1;
+ s += 1;
+ }
+ if (*s == '\0')
+ break;
+ toks[tok_i].line = line;
+ if (*s == '"') {
+ const char *end = strchr(s + 1, '"');
+ if (end == NULL) {
+ printf("unclosed string\n");
+ lexer_free(toks);
+ return NULL;
+ }
+ size_t len = end - s - 1;
+ toks[tok_i].v.s = calloc(1, len + 1);
+ if (toks[tok_i].v.s == NULL) {
+ perror("lexer");
+ lexer_free(toks);
+ return NULL;
+ }
+ toks[tok_i].type = TOK_STRING;
+ strncpy(toks[tok_i].v.s, s + 1, len);
+ tok_i += 1;
+ s = end + 1;
+ } else if (isalpha(*s) || *s == '_') {
+ size_t len = 0;
+ while (isalnum(s[len]) || s[len] == '_')
+ len += 1;
+ toks[tok_i].v.s = calloc(1, len + 1);
+ if (toks[tok_i].v.s == NULL) {
+ perror("lexer");
+ lexer_free(toks);
+ return NULL;
+ }
+ strncpy(toks[tok_i].v.s, s, len);
+ toks[tok_i].type = TOK_KEYWORD;
+ tok_i += 1;
+ s += len;
+ } else if (isdigit(*s)) {
+ size_t len = 0;
+ while (isdigit(s[len]))
+ len += 1;
+ toks[tok_i].v.i = atoi(s);
+ toks[tok_i].type = TOK_INTEGER;
+ tok_i += 1;
+ s += len;
+ } else if (two_wide_tok(s) != TOK_NONE) {
+ toks[tok_i].type = two_wide_tok(s);
+ toks[tok_i].v.c = *s;
+ tok_i += 1;
+ s += 2;
+ } else if (one_wide_tok(s) != TOK_NONE) {
+ toks[tok_i].type = one_wide_tok(s);
+ toks[tok_i].v.c = *s;
+ tok_i += 1;
+ s += 1;
+ } else {
+ printf("wtf is this shit? %c\n", *s);
+ s += 1;
+ }
+ if (tok_i == size) {
+ toks = resize_toks(toks, &size);
+ if (toks == NULL)
+ return NULL;
+ }
+ }
+ return toks;
+}
+
+void lexer_free(Token *toks)
+{
+ if (toks != NULL) {
+ for (Token *tok = toks; tok->type != TOK_NONE; tok += 1)
+ token_free(tok);
+ free(toks);
+ }
+}
+
+void lexer_print(const Token *toks)
+{
+ for (const Token *tok = toks; tok->type != TOK_NONE; tok += 1)
+ token_print(tok);
+}