summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkdx <kikoodx@paranoici.org>2023-01-19 04:16:55 +0100
committerkdx <kikoodx@paranoici.org>2023-01-19 04:18:04 +0100
commit8150144a228692272fd181d2ebc1041b45fde032 (patch)
tree0226a0600864c9ed8515a5d84aece35852e80c4b
parentb600369367193c867013d6ac56aa3e750b66f6be (diff)
downloadgolem-8150144a228692272fd181d2ebc1041b45fde032.tar.gz
fully lex test
-rw-r--r--Makefile2
-rw-r--r--Token.c37
-rw-r--r--Token.h40
-rw-r--r--lexer.c142
-rw-r--r--lexer.h6
-rw-r--r--main.c9
-rw-r--r--test5
7 files changed, 239 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index c4f2d14..3ce0d8c 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ LD := $(CC)
SRC := $(wildcard *.c)
OBJ := $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(SRC)))
NAME := restruct
-CFLAGS := -Wall -Wextra -std=c99 -pedantic
+CFLAGS := -g -O0 -Wall -Wextra -std=c99 -pedantic
LDFLAGS :=
all: $(NAME)
diff --git a/Token.c b/Token.c
new file mode 100644
index 0000000..0f259a1
--- /dev/null
+++ b/Token.c
@@ -0,0 +1,37 @@
+#include "Token.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+void token_free(Token *tok)
+{
+ switch (tok->type) {
+ case TOK_KEYWORD:
+ if (tok != NULL && tok->v.s != NULL)
+ free(tok->v.s);
+ default:
+ break;
+ }
+}
+
+void token_print(const Token *tok)
+{
+ printf("%d ", tok->line);
+ switch (tok->type) {
+ case TOK_KEYWORD: printf("TOK_KEYWORD %s\n", tok->v.s); break;
+ case TOK_STRING: printf("TOK_STRING \"%s\"\n", tok->v.s); break;
+ case TOK_INTEGER: printf("TOK_INTEGER %d\n", tok->v.i); break;
+ case TOK_PAREN_OPEN: printf("TOK_PAREN_OPEN\n"); break;
+ case TOK_PAREN_CLOS: printf("TOK_PAREN_CLOS\n"); break;
+ case TOK_CURL_OPEN: printf("TOK_CURL_OPEN\n"); break;
+ case TOK_CURL_CLOS: printf("TOK_CURL_CLOS\n"); break;
+ case TOK_SQUAR_OPEN: printf("TOK_SQUAR_OPEN\n"); break;
+ case TOK_SQUAR_CLOS: printf("TOK_SQUAR_CLOS\n"); break;
+ case TOK_SEMICOLON: printf("TOK_SEMICOLON\n"); break;
+ case TOK_ASSIGN: printf("TOK_ASSIGN\n"); break;
+ case TOK_COMMA: printf("TOK_COMMA\n"); break;
+ case TOK_COMP_LESS: printf("TOK_COMP_LESS\n"); break;
+ case TOK_MODULO: printf("TOK_MODULO\n"); break;
+ case TOK_INCREMENT: printf("TOK_INCREMENT\n"); break;
+ default: printf("token_print TODO: type %d\n", tok->type); break;
+ }
+}
diff --git a/Token.h b/Token.h
new file mode 100644
index 0000000..f11753f
--- /dev/null
+++ b/Token.h
@@ -0,0 +1,40 @@
+#pragma once
+#include <stdbool.h>
+
+enum {
+ TOK_NONE,
+ TOK_KEYWORD,
+ TOK_STRING,
+ TOK_INTEGER,
+ /* single char toks */
+ TOK_PAREN_OPEN,
+ TOK_PAREN_CLOS,
+ TOK_CURL_OPEN,
+ TOK_CURL_CLOS,
+ TOK_SQUAR_OPEN,
+ TOK_SQUAR_CLOS,
+ TOK_SEMICOLON,
+ TOK_ASSIGN,
+ TOK_COMMA,
+ TOK_COMP_LESS,
+ TOK_MODULO,
+ /* double char toks */
+ TOK_INCREMENT,
+};
+
+union TokenValue {
+ char *s;
+ char c;
+ int i;
+ double d;
+ bool b;
+};
+
+typedef struct Token {
+ unsigned int type;
+ unsigned int line;
+ union TokenValue v;
+} Token;
+
+void token_free(Token *tok);
+void token_print(const Token *tok);
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..a68b93e
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,142 @@
+#include "lexer.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+static Token *resize_toks(Token *toks, size_t *size)
+{
+ *size += 128;
+ Token *const new_toks = realloc(toks, sizeof(Token) * (*size + 1));
+ if (new_toks == NULL) {
+ perror("resize_toks");
+ lexer_free(toks);
+ return NULL;
+ }
+ return new_toks;
+}
+
+static unsigned int one_wide_tok(const char *s)
+{
+ switch (*s) {
+ case '(': return TOK_PAREN_OPEN;
+ case ')': return TOK_PAREN_CLOS;
+ case '{': return TOK_CURL_OPEN;
+ case '}': return TOK_CURL_CLOS;
+ case '[': return TOK_SQUAR_OPEN;
+ case ']': return TOK_SQUAR_CLOS;
+ case ';': return TOK_SEMICOLON;
+ case '=': return TOK_ASSIGN;
+ case ',': return TOK_COMMA;
+ case '<': return TOK_COMP_LESS;
+ case '%': return TOK_MODULO;
+ default: return TOK_NONE;
+ }
+}
+
+#define PAIR(a, b) ((int)(a) + (int)(b) * 256)
+static unsigned int two_wide_tok(const char *s)
+{
+ switch (PAIR(s[0], s[1])) {
+ case PAIR('+', '+'): return TOK_INCREMENT;
+ default: return TOK_NONE;
+ }
+}
+
+Token *lexer(const char *s)
+{
+ size_t size = 128;
+ Token *toks = calloc(size + 1, sizeof(Token));
+ size_t tok_i = 0;
+ size_t line = 1;
+ if (toks == NULL) {
+ perror("lexer");
+ return NULL;
+ }
+ while (*s != '\0') {
+ /* Skip whitespaces. */
+ while (isspace(*s)) {
+ if (*s == '\n')
+ line += 1;
+ s += 1;
+ }
+ if (*s == '\0')
+ break;
+ toks[tok_i].line = line;
+ if (*s == '"') {
+ const char *end = strchr(s + 1, '"');
+ if (end == NULL) {
+ printf("unclosed string\n");
+ lexer_free(toks);
+ return NULL;
+ }
+ size_t len = end - s - 1;
+ toks[tok_i].v.s = calloc(1, len + 1);
+ if (toks[tok_i].v.s == NULL) {
+ perror("lexer");
+ lexer_free(toks);
+ return NULL;
+ }
+ toks[tok_i].type = TOK_STRING;
+ strncpy(toks[tok_i].v.s, s + 1, len);
+ tok_i += 1;
+ s = end + 1;
+ } else if (isalpha(*s) || *s == '_') {
+ size_t len = 0;
+ while (isalnum(s[len]) || s[len] == '_')
+ len += 1;
+ toks[tok_i].v.s = calloc(1, len + 1);
+ if (toks[tok_i].v.s == NULL) {
+ perror("lexer");
+ lexer_free(toks);
+ return NULL;
+ }
+ strncpy(toks[tok_i].v.s, s, len);
+ toks[tok_i].type = TOK_KEYWORD;
+ tok_i += 1;
+ s += len;
+ } else if (isdigit(*s)) {
+ size_t len = 0;
+ while (isdigit(s[len]))
+ len += 1;
+ toks[tok_i].v.i = atoi(s);
+ toks[tok_i].type = TOK_INTEGER;
+ tok_i += 1;
+ s += len;
+ } else if (two_wide_tok(s) != TOK_NONE) {
+ toks[tok_i].type = two_wide_tok(s);
+ toks[tok_i].v.c = *s;
+ tok_i += 1;
+ s += 2;
+ } else if (one_wide_tok(s) != TOK_NONE) {
+ toks[tok_i].type = one_wide_tok(s);
+ toks[tok_i].v.c = *s;
+ tok_i += 1;
+ s += 1;
+ } else {
+ printf("wtf is this shit? %c\n", *s);
+ s += 1;
+ }
+ if (tok_i == size) {
+ toks = resize_toks(toks, &size);
+ if (toks == NULL)
+ return NULL;
+ }
+ }
+ return toks;
+}
+
+void lexer_free(Token *toks)
+{
+ if (toks != NULL) {
+ for (Token *tok = toks; tok->type != TOK_NONE; tok += 1)
+ token_free(tok);
+ free(toks);
+ }
+}
+
+void lexer_print(const Token *toks)
+{
+ for (const Token *tok = toks; tok->type != TOK_NONE; tok += 1)
+ token_print(tok);
+}
diff --git a/lexer.h b/lexer.h
new file mode 100644
index 0000000..150a7ad
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,6 @@
+#pragma once
+#include "Token.h"
+
+Token *lexer(const char *s);
+void lexer_free(Token *toks);
+void lexer_print(const Token *toks);
diff --git a/main.c b/main.c
index 2f334e9..a1f9439 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,5 @@
#include "drain.h"
+#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
@@ -19,7 +20,13 @@ int main(int argc, char **argv)
fprintf(stderr, "failed to drain '%s'\n", argv[1]);
return 1;
}
- printf("%s", data);
+ Token *toks = lexer(data);
free(data);
+ if (toks == NULL) {
+ fprintf(stderr, "lexer failed\n");
+ return 1;
+ }
+ lexer_print(toks);
+ lexer_free(toks);
return 0;
}
diff --git a/test b/test
new file mode 100644
index 0000000..f49dbe5
--- /dev/null
+++ b/test
@@ -0,0 +1,5 @@
+int main(void) {
+ for (int i = 0; i < 10; i++) {
+ printf("%d\n", i);
+ }
+}