#include #include #include #include #include #include #include #define ANSI_RED "\x1b[31m" #define ANSI_GREEN "\x1b[32m" #define ANSI_YELLOW "\x1b[33m" #define ANSI_BLUE "\x1b[34m" #define ANSI_MAGENTA "\x1b[35m" #define ANSI_CYAN "\x1b[36m" #define ANSI_RESET "\x1b[0m" static char *source = NULL; static int get_line_number(const char *p) { if (p == NULL) return 0; int line = 1; for (char *s = source; *s != '\0'; s++) { if (s == p) return line; line += (*s == '\n'); } return line; } static char * get_error_line(int target) { int line = 1; char *s = source; while (line < target) { line += (*s == '\n'); s += 1; } return s; } typedef enum { TOK_PUNCT, TOK_NUM, TOK_WORD, TOK_STRING, TOK_EOF, } TokenType; static const char * str_tok[] = { [TOK_PUNCT] = "TOK_PUNCT", [TOK_NUM] = "TOK_NUM", [TOK_WORD] = "TOK_WORD", [TOK_STRING] = "TOK_STRING", [TOK_EOF] = "TOK_EOF", }; typedef struct Token Token; struct Token { TokenType type; Token *next; int val; char *loc; int len; }; static char *phase = "preparation"; static void verror(const char *fmt, va_list va) { fprintf(stderr, "%s: " ANSI_RED "error: " ANSI_RESET, phase); vfprintf(stderr, fmt, va); fprintf(stderr, "\n"); } static void error(const char *fmt, ...) { va_list va; va_start(va, fmt); verror(fmt, va); va_end(va); exit(1); } static void error_loc(const char *loc, int len, const char *fmt, ...) { va_list va; va_start(va, fmt); verror(fmt, va); va_end(va); const int line_no = get_line_number(loc); if (line_no == 0) exit(1); const char *line = get_error_line(line_no); const char *line_end = strchr(line, '\n'); if (line_end == NULL) fprintf(stderr, "%5d | %s\n", line_no, line); else fprintf(stderr, "%5d | %.*s\n", line_no, (int)(line_end - line), line); int spaces = 8; for (int i = 0; i < loc - line; i++) spaces += 1 + 7 * (line[i] == '\t'); fprintf(stderr, "%*s", spaces, ""); fprintf(stderr, ANSI_RED "^"); for (int i = 1; i < len; i++) fprintf(stderr, "~"); fprintf(stderr, ANSI_RESET "\n"); exit(1); } static void expect(const Token *tok, TokenType type) { if (tok->type != type) error_loc(tok->loc, tok->len, "expected a %s, got %s '%.*s'", str_tok[type], str_tok[tok->type], tok->len, tok->loc); } static bool equal(const Token *tok, const char *op) { if (tok == NULL) return false; return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0'; } static Token * skip(Token *tok, char *s) { if (!equal(tok, s)) error_loc(tok->loc, tok->len, "expected '%s'; got '%.*s'", s, tok->len, tok->loc); return tok->next; } static int get_number(Token *tok) { expect(tok, TOK_NUM); return tok->val; } static Token * new_token(TokenType type, char *start, char *end) { Token *tok = calloc(1, sizeof(Token)); if (tok == NULL) error("calloc failed", 0); tok->type = type; tok->loc = start; tok->len = end - start; return tok; } static int is_punct(const char *p) { if (strchr("=!<>", p[0]) != NULL && p[1] == '=') return 2; if (strchr("-+", p[0]) != NULL && p[0] == p[1]) return 2; if (strchr("|=", p[0]) != NULL && p[1] == '>') return 2; return (strchr("+-*/\\%()<>,;{}[]=&^|", p[0]) != NULL); } static unsigned char escaped_char(char *s, char **skipped) { if (*s == '\\') { int rv = -1; switch (s[1]) { case 'a': rv = '\a'; break; case 'b': rv = '\b'; break; case 'f': rv = '\f'; break; case 'n': rv = '\n'; break; case 'r': rv = '\r'; break; case 't': rv = '\t'; break; case 'v': rv = '\v'; break; case '\\':rv = '\\'; break; case '\'':rv = '\''; break; case '"': rv = '"'; break; default: break; } if (rv >= 0) { *skipped = s + 2; return rv; } } *skipped = s + 1; return *s; } static Token * tokenize(char *p) { Token head = {0}; Token *cur = &head; while (*p != '\0') { /* comment */ if (strncmp(p, "//", 2) == 0 || strncmp(p, "🗿", strlen("🗿")) == 0) { while (*p != '\0' && *p != '\n') p++; continue; } if (isspace(*p)) { p++; continue; } if (isdigit(*p)) { char *q = p; const unsigned long val = strtoul(p, &p, 0); cur = cur->next = new_token(TOK_NUM, q, p); cur->val = val; continue; } if (*p == '"') { char *end = strchr(p + 1, '"'); if (end == NULL) error_loc(p, 1, "unclosed double quotes"); cur = cur->next = new_token(TOK_STRING, p, end + 1); p = end + 1; continue; } if (*p == '`') { char *q = p; p += 1; while (isalnum(*p) || *p == '_') p += 1; cur = cur->next = new_token(TOK_STRING, q, p + 1); continue; } if (*p == '\'' && p[1] != '\'') { char *q = p; const int c = escaped_char(p + 1, &p); if (*p != '\'') error_loc(p, 1, "unclosed single quotes"); p += 1; cur = cur->next = new_token(TOK_NUM, q, p); cur->val = (unsigned char)c; continue; } const int punct = is_punct(p); if (punct) { cur = cur->next = new_token(TOK_PUNCT, p, p + punct); p += punct; continue; } if (isalpha(*p) || *p == '_' || *p == '.') { char *q = p; while (isalnum(*p) || *p == '_' || *p == '.') p += 1; cur = cur->next = new_token(TOK_WORD, q, p); continue; } error_loc(p, 1, "invalid token '%c'", *p); } cur->next = new_token(TOK_EOF, p, p); return head.next; } typedef enum { NOD_ADD, // + NOD_SUB, // - NOD_MUL, // * NOD_DIV, // / NOD_MOD, // % NOD_EQU, // == NOD_NEQ, // != NOD_LT, // < NOD_LTE, // <= NOD_GT, // > NOD_GTE, // >= NOD_AND, // & NOD_OR, // | NOD_XOR, // ^ NOD_NUM, // integer NOD_STRING, // "" NOD_ARRAY, // {} NOD_DEREF, // [a] NOD_EMPTY_STMT, NOD_BLOCK_STMT, NOD_EXPR_STMT, NOD_RETURN_STMT, NOD_BREAK_STMT, NOD_SLP_STMT, NOD_DBG_STMT, NOD_WRT_STMT, NOD_ERR_STMT, NOD_RED_STMT, NOD_ASSIGN_STMT, NOD_POSTFIX, NOD_IFELSE_STMT, NOD_LOOP_STMT, NOD_FUN, NOD_WORD, NOD_FNCALL, NOD_GLOBAL, NOD_DEFINE, NOD_LOCAL, NOD_REF, } NodeType; typedef struct Node Node; struct Node { NodeType type; Node *lhs; Node *rhs; Node *next; int val; char *loc; int len; }; static int node_size(const Node *node) { int size = 0; while (node != NULL) { size += 1; node = node->next; } return size; } static Node * node_tail(Node *node) { while (node != NULL && node->next != NULL) node = node->next; return node; } /* doesn't check for \0 in op beware */ static bool node_equal(const Node *node, const char *op) { return memcmp(node->loc, op, node->len) == 0; } static int node_find(const Node *node, Node *op) { int i = 0; while (node != NULL) { if (node->len == op->len && node_equal(node, op->loc)) return i; i += 1; node = node->next; } return -1; } static Node * node_get(Node *node, int idx) { while (idx--) node = node->next; return node; } static Node * new_node(NodeType type, Token *tok) { Node *const node = calloc(1, sizeof(Node)); if (node == NULL) error("calloc failed", 0); node->type = type; if (tok != NULL) { node->loc = tok->loc; node->len = tok->len; } return node; } static Node * new_word(Token *tok) { Node *const node = new_node(NOD_WORD, tok); return node; } static Node * new_binary(NodeType type, Token *tok, Node *lhs, Node *rhs) { Node *const node = new_node(type, tok); node->lhs = lhs; node->rhs = rhs; return node; } static Node * new_unary(NodeType type, Token *tok, Node *node) { return new_binary(type, tok, node, NULL); } static Node * new_num(int val, Token *tok) { Node *const node = new_node(NOD_NUM, tok); node->val = val; return node; } static Node *stmt(Token **rest, Token *tok); static Node *function(Token **rest, Token *tok); static void define(Token **rest, Token *tok); static void denum(Token **rest, Token *tok); static Node *global(Token **rest, Token *tok); static Node *local(Token **rest, Token *tok); static Node *expr(Token **rest, Token *tok); static Node *empty_stmt(Token **rest, Token *tok); static Node *block_stmt(Token **rest, Token *tok); static Node *expr_stmt(Token **rest, Token *tok); static Node *return_stmt(Token **rest, Token *tok); static Node *break_stmt(Token **rest, Token *tok); static Node *slp_stmt(Token **rest, Token *tok); static Node *dbg_stmt(Token **rest, Token *tok); static Node *wrt_stmt(Token **rest, Token *tok); static Node *err_stmt(Token **rest, Token *tok); static Node *red_stmt(Token **rest, Token *tok); static Node *postfix(Token **rest, Token *tok); static Node *assign_stmt(Token **rest, Token *tok); static Node *ifelse_stmt(Token **rest, Token *tok); static Node *loop_stmt(Token **rest, Token *tok); static Node *logical(Token **rest, Token *tok); static Node *equality(Token **rest, Token *tok); static Node *relational(Token **rest, Token *tok); static Node *add(Token **rest, Token *tok); static Node *mul(Token **rest, Token *tok); static Node *primary(Token **rest, Token *tok); static Node *locals; static int locals_size; static Node *strings; static Node *defines; static int strings_size; static Node * function(Token **rest, Token *tok) { Node *const node = new_node(NOD_FUN, tok); node->lhs = new_word(tok); tok = skip(tok->next, "("); Node head = {0}; Node *cur = &head; while (tok->type == TOK_WORD) { cur = cur->next = new_word(tok); tok = tok->next; if (!equal(tok, ")")) tok = skip(tok, ","); } node->lhs->next = head.next; tok = skip(tok, ")"); node->rhs = stmt(&tok, tok); *rest = tok; return node; } static void define(Token **rest, Token *tok) { tok = skip(tok, "define"); Node *store = new_word(tok); tok = skip(tok->next, "="); store->rhs = expr(&tok, tok); *rest = skip(tok, ";"); store->type = NOD_DEFINE; store->next = defines; defines = store; } static void denum(Token **rest, Token *tok) { tok = skip(tok, "enum"); Node *store = new_word(tok); int v = 0; store->rhs = new_num(v++, tok); store->type = NOD_DEFINE; store->next = defines; defines = store; tok = tok->next; while (tok->type != TOK_EOF && !equal(tok, ";")) { tok = skip(tok, ","); store = new_word(tok); store->rhs = new_num(v++, tok); store->type = NOD_DEFINE; store->next = defines; defines = store; tok = tok->next; } *rest = skip(tok, ";"); } static int const_expr(Node *node); static Node * global(Token **rest, Token *tok) { tok = skip(tok, "global"); expect(tok, TOK_WORD); Node head = {0}; Node *node = &head; while (tok->type != TOK_EOF && !equal(tok, ";")) { Node *const cur = new_word(tok); node = node->next = cur; cur->type = NOD_GLOBAL; tok = tok->next; if (equal(tok, "[")) { tok = tok->next; cur->lhs = expr(&tok, tok); tok = skip(tok, "]"); } if (equal(tok, "=")) { tok = tok->next; cur->val = const_expr(expr(&tok, tok)); } if (!equal(tok, ";")) tok = skip(tok, ","); } *rest = skip(tok, ";"); return head.next; } static Node * local(Token **rest, Token *tok) { tok = skip(tok, "local"); expect(tok, TOK_WORD); Node head = {0}; Node *node = &head; while (tok->type != TOK_EOF && !equal(tok, ";")) { Node *const cur = new_unary(NOD_LOCAL, tok, new_word(tok)); node = node->next = cur; tok = tok->next; if (equal(tok, "=")) { tok = tok->next; cur->rhs = expr(&tok, tok); } if (!equal(tok, ";")) tok = skip(tok, ","); } *rest = skip(tok, ";"); return head.next; } static Node * expr(Token **rest, Token *tok) { return logical(rest, tok); } static Node *logical(Token **rest, Token *tok) { Node *node = equality(&tok, tok); for (;;) { if (equal(tok, "&")) { node = new_binary(NOD_AND, tok, node, equality(&tok, tok->next)); continue; } if (equal(tok, "^")) { node = new_binary(NOD_XOR, tok, node, equality(&tok, tok->next)); continue; } if (equal(tok, "|")) { node = new_binary(NOD_OR, tok, node, equality(&tok, tok->next)); continue; } *rest = tok; return node; } } static Node *equality(Token **rest, Token *tok) { Node *node = relational(&tok, tok); for (;;) { if (equal(tok, "==")) { node = new_binary(NOD_EQU, tok, node, relational(&tok, tok->next)); continue; } if (equal(tok, "!=")) { node = new_binary(NOD_NEQ, tok, node, relational(&tok, tok->next)); continue; } *rest = tok; return node; } } static Node *relational(Token **rest, Token *tok) { Node *node = add(&tok, tok); for (;;) { if (equal(tok, "<")) { node = new_binary(NOD_LT, tok, node, add(&tok, tok->next)); continue; } if (equal(tok, "<=")) { node = new_binary(NOD_LTE, tok, node, add(&tok, tok->next)); continue; } if (equal(tok, ">")) { node = new_binary(NOD_GT, tok, node, add(&tok, tok->next)); continue; } if (equal(tok, ">=")) { node = new_binary(NOD_GTE, tok, node, add(&tok, tok->next)); continue; } *rest = tok; return node; } } // add = mul ("+" mul | "-" mul)* static Node * add(Token **rest, Token *tok) { Node *node = mul(&tok, tok); for (;;) { if (equal(tok, "+")) { node = new_binary(NOD_ADD, tok, node, mul(&tok, tok->next)); continue; } if (equal(tok, "-")) { node = new_binary(NOD_SUB, tok, node, mul(&tok, tok->next)); continue; } *rest = tok; return node; } } // mul = primary ("*" primary | "/" primary)* static Node * mul(Token **rest, Token *tok) { Node *node = primary(&tok, tok); for (;;) { if (equal(tok, "*")) { node = new_binary(NOD_MUL, tok, node, primary(&tok, tok->next)); continue; } if (equal(tok, "/")) { node = new_binary(NOD_DIV, tok, node, primary(&tok, tok->next)); continue; } if (equal(tok, "\\")) { node = new_binary(NOD_DIV, tok, primary(&tok, tok->next), node); } if (equal(tok, "%")) { node = new_binary(NOD_MOD, tok, node, primary(&tok, tok->next)); continue; } *rest = tok; return node; } } static Node * fncall(Token **rest, Token *tok) { Node *node = new_word(tok); tok = tok->next; node->type = NOD_FNCALL; /* args */ Node head = {0}; Node *cur = &head; if (equal(tok, "(")) { tok = skip(tok, "("); while (tok->type != TOK_EOF && !equal(tok, ")")) { cur = cur->next = expr(&tok, tok); if (!equal(tok, ")")) tok = skip(tok, ","); } tok = skip(tok, ")"); } *rest = tok; node->rhs = head.next; return node; } static Node * array(Token **rest, Token *tok) { Node *const node = new_node(NOD_ARRAY, tok); tok = skip(tok, "{"); Node *cur = node; while (tok->type != TOK_EOF && !equal(tok, "}")) { cur = cur->lhs = expr(&tok, tok); if (!equal(tok, "}")) tok = skip(tok, ","); } *rest = skip(tok, "}"); return node; } static Node * primary(Token **rest, Token *tok) { if (equal(tok, "(")) { Node *node = expr(&tok, tok->next); *rest = skip(tok, ")"); return node; } if (equal(tok, "[")) { Node *node = new_node(NOD_DEREF, tok); tok = tok->next; node->lhs = expr(&tok, tok); *rest = skip(tok, "]"); return node; } if (equal(tok, "&")) { Node *node = new_node(NOD_REF, tok); tok = tok->next; expect(tok, TOK_WORD); node->lhs = new_word(tok); *rest = tok->next; return node; } if (tok->type == TOK_NUM) { Node *node = new_num(tok->val, tok); *rest = tok->next; return node; } if (tok->type == TOK_STRING) { Node *node = new_node(NOD_STRING, tok); *rest = tok->next; return node; } if (equal(tok, "{")) return array(rest, tok); if (tok->type == TOK_WORD && equal(tok->next, "(")) { /* function call */ Node *node = fncall(&tok, tok); Node *cur = node; while (tok->type != TOK_EOF && equal(tok, "|>")) { tok = tok->next; cur = cur->lhs = fncall(&tok, tok); } *rest = tok; return node; } if (tok->type == TOK_WORD && (equal(tok->next, "++") || equal(tok->next, "--"))) return postfix(rest, tok); if (tok->type == TOK_WORD) { /* variable */ Node *node = new_word(tok); *rest = tok->next; return node; } error_loc(tok->loc, tok->len, "expected an expression"); return NULL; } static Node * stmt(Token **rest, Token *tok) { if (equal(tok, ";")) return empty_stmt(rest, tok); if (equal(tok, "{")) return block_stmt(rest, tok); if (equal(tok, "if")) return ifelse_stmt(rest, tok); if (equal(tok, "dbg")) return dbg_stmt(rest, tok); if (equal(tok, "red")) return red_stmt(rest, tok); if (equal(tok, "slp")) return slp_stmt(rest, tok); if (equal(tok, "wrt")) return wrt_stmt(rest, tok); if (equal(tok, "err")) return err_stmt(rest, tok); if (equal(tok, "loop")) return loop_stmt(rest, tok); if (equal(tok, "break")) return break_stmt(rest, tok); if (equal(tok, "=>") || equal(tok, "return")) return return_stmt(rest, tok); if (equal(tok, "[")) { int opn = 1; Token *cur = tok->next; while (cur->type != TOK_EOF && opn) { opn += equal(cur, "["); opn -= equal(cur, "]"); cur = cur->next; } if (cur->type == TOK_EOF) error("unexpected EOF", 0); if (equal(cur, "=")) return assign_stmt(rest, tok); } if (equal(tok->next, "=")) return assign_stmt(rest, tok); return expr_stmt(rest, tok); } static Node * empty_stmt(Token **rest, Token *tok) { *rest = skip(tok, ";"); return new_node(NOD_EMPTY_STMT, tok); } static Node * block_stmt(Token **rest, Token *tok) { skip(tok, "{"); if (equal(tok->next, "}")) { *rest = tok->next->next; return new_node(NOD_EMPTY_STMT, tok); } Node *const node = new_node(NOD_BLOCK_STMT, tok); tok = tok->next; Node head = {0}; Node *cur = &head; while (tok->type != TOK_EOF && equal(tok, "local")) { cur = cur->next = local(&tok, tok); while (cur->next != NULL) cur = cur->next; } while (tok->type != TOK_EOF && !equal(tok, "}")) cur = cur->next = stmt(&tok, tok); node->lhs = head.next; *rest = skip(tok, "}"); return node; } static Node * expr_stmt(Token **rest, Token *tok) { Node *const node = new_unary(NOD_EXPR_STMT, tok, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * return_stmt(Token **rest, Token *tok) { if (tok->type == TOK_PUNCT) tok = skip(tok, "=>"); else tok = skip(tok, "return"); Node *node; if (equal(tok, ";")) node = new_unary(NOD_RETURN_STMT, tok, new_num(0, NULL)); else node = new_unary(NOD_RETURN_STMT, tok, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * break_stmt(Token **rest, Token *tok) { *rest = skip(skip(tok, "break"), ";"); return new_node(NOD_BREAK_STMT, tok); } static Node * slp_stmt(Token **rest, Token *tok) { *rest = skip(skip(tok, "slp"), ";"); return new_node(NOD_SLP_STMT, tok); } static Node * dbg_stmt(Token **rest, Token *tok) { Token *const og = tok; tok = skip(tok, "dbg"); Node *const node = new_unary(NOD_DBG_STMT, og, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * wrt_stmt(Token **rest, Token *tok) { Token *const og = tok; tok = skip(tok, "wrt"); Node *const node = new_unary(NOD_WRT_STMT, og, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * err_stmt(Token **rest, Token *tok) { Token *const og = tok; tok = skip(tok, "err"); Node *const node = new_unary(NOD_ERR_STMT, og, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * red_stmt(Token **rest, Token *tok) { Token *const og = tok; tok = skip(tok, "red"); Node *const node = new_unary(NOD_RED_STMT, og, expr(&tok, tok)); *rest = skip(tok, ";"); return node; } static Node * postfix(Token **rest, Token *tok) { Token *const og = tok; Node *lhs; expect(tok, TOK_WORD); lhs = new_word(tok); tok = tok->next; expect(tok, TOK_PUNCT); Node *const node = new_binary(NOD_POSTFIX, og, lhs, new_word(tok)); *rest = tok->next; return node; } static Node * assign_stmt(Token **rest, Token *tok) { Node *lhs; if (equal(tok, "[")) { lhs = new_node(NOD_DEREF, tok); tok = tok->next; lhs->lhs = expr(&tok, tok); *rest = skip(tok, "]"); } else { expect(tok, TOK_WORD); lhs = new_word(tok); } skip(tok->next, "="); Node *const node = new_binary(NOD_ASSIGN_STMT, tok, lhs, expr(&tok, tok->next->next)); *rest = skip(tok, ";"); return node; } static Node * ifelse_stmt(Token **rest, Token *tok) { Node *const node = new_node(NOD_IFELSE_STMT, tok); tok = skip(tok, "if"); node->lhs = expr(&tok, tok); node->rhs = stmt(&tok, tok); if (equal(tok, "else")) { tok = tok->next; node->rhs->next = stmt(&tok, tok); } *rest = tok; return node; } static Node * loop_stmt(Token **rest, Token *tok) { Token *const og = tok; tok = skip(tok, "loop"); Node *const node = new_unary(NOD_LOOP_STMT, og, stmt(&tok, tok)); *rest = tok; return node; } // code generator static int depth; static void gen_expr(Node *node); static void gen_globaldec(Node *node); static void gen_localdec(Node *node); static void gen_fncall(Node *node); static void gen_variableget(Node *node); static void gen_deref(Node *node); static void gen_string(Node *node); static void gen_ref(Node *node); static void gen_postfix(Node *node); static void gen_expr(Node *node) { switch (node->type) { case NOD_NUM: printf("\tLIT %04x\n", node->val); depth += 1; return; case NOD_FNCALL: gen_fncall(node); return; case NOD_WORD: gen_variableget(node); return; case NOD_DEREF: gen_deref(node); return; case NOD_STRING: case NOD_ARRAY: gen_string(node); return; case NOD_REF: gen_ref(node); return; case NOD_POSTFIX: gen_postfix(node); return; default: break; } gen_expr(node->lhs); gen_expr(node->rhs); depth -= 1; const char *op = NULL; switch (node->type) { case NOD_ADD: op = "ADD"; break; case NOD_SUB: op = "SUB"; break; case NOD_MUL: op = "MUL"; break; case NOD_DIV: op = "DIV"; break; case NOD_MOD: op = "MOD"; break; case NOD_EQU: op = "EQU"; break; case NOD_NEQ: op = "NEQ"; break; case NOD_LT: op = "LTH"; break; case NOD_LTE: op = "LTE"; break; case NOD_GT: op = "GTH"; break; case NOD_GTE: op = "GTE"; break; case NOD_AND: op = "AND"; break; case NOD_OR: op = "ORA"; break; case NOD_XOR: op = "XOR"; break; default: break; } if (op == NULL) error_loc(node->loc, node->len, "invalid expression %d", node->type); printf("\t%s\n", op); } static void gen_globaldec(Node *node) { printf("@__gl_%.*s\n", node->len, node->loc); if (node->lhs != NULL) { printf("\t,__gla_%.*s\n", node->len, node->loc); printf("@__gla_%.*s\n", node->len, node->loc); const int len = const_expr(node->lhs); for (int i = 0; i < len; i++) printf("\t%04x\n", node->val); } else printf("\t%04x\n", node->val); } static void gen_localdec(Node *node) { if (node->rhs == NULL) return; const int found = node_find(locals, node->lhs); gen_expr(node->rhs); printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); printf("\tSTA\n"); depth -= 1; } static void gen_fncall(Node *node) { Node *cur = node->rhs; while (cur != NULL) { gen_expr(cur); depth -= 1; cur = cur->next; } printf("\tJRT ,__fn_%.*s\n", node->len, node->loc); depth += 1; if (node->lhs != NULL) { depth -= 1; gen_fncall(node->lhs); } } static int get_define(Node *node) { const int define_idx = node_find(defines, node); if (define_idx != -1) return const_expr(node_get(defines, define_idx)->rhs); error_loc(node->loc, node->len, "define '%.*s' doesn't exist", node->len, node->loc); return 0; } static int const_expr(Node *node) { Node *const lhs = node->lhs; Node *const rhs = node->rhs; switch (node->type) { case NOD_ADD: return const_expr(lhs) + const_expr(rhs); case NOD_SUB: return const_expr(lhs) - const_expr(rhs); case NOD_MUL: return const_expr(lhs) * const_expr(rhs); case NOD_DIV: return const_expr(lhs) / const_expr(rhs); case NOD_MOD: return const_expr(lhs) % const_expr(rhs); case NOD_EQU: return const_expr(lhs) == const_expr(rhs); case NOD_NEQ: return const_expr(lhs) != const_expr(rhs); case NOD_LT: return const_expr(lhs) < const_expr(rhs); case NOD_LTE: return const_expr(lhs) <= const_expr(rhs); case NOD_GT: return const_expr(lhs) > const_expr(rhs); case NOD_GTE: return const_expr(lhs) >= const_expr(rhs); case NOD_AND: return const_expr(lhs) & const_expr(rhs); case NOD_OR: return const_expr(lhs) | const_expr(rhs); case NOD_XOR: return const_expr(lhs) ^ const_expr(rhs); default: break; } switch (node->type) { case NOD_NUM: return node->val; case NOD_WORD: return get_define(node); default: break; } error_loc(node->loc, node->len, "const expr doesn't support %d expression", node->type); return 0; } static void gen_variableget(Node *node) { const int define_idx = node_find(defines, node); const int found = node_find(locals, node); if (define_idx != -1) printf("\tLIT %04x\n", (unsigned)const_expr(node_get(defines, define_idx)->rhs) % 0x10000); else if (found != -1) { printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); printf("\tLDA\n"); } else { printf("\tLIT ,__gl_%.*s\n", node->len, node->loc); printf("\tLDA\n"); } depth += 1; } static void gen_deref(Node *node) { gen_expr(node->lhs); printf("\tLDA\n"); } static void gen_string(Node *node) { Node *store = new_node(0, NULL); store->lhs = node; store->next = strings; strings = store; strings_size += 1; printf("\tLIT ,__arr_%x\n", strings_size - 1); depth += 1; } static void gen_ref(Node *node) { const int found = node_find(locals, node->lhs); if (found != -1) { printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); } else printf("\tLIT ,__gl_%.*s\n", node->lhs->len, node->lhs->loc); depth += 1; } static void gen_assign_stmt(Node *node) { gen_expr(node->rhs); if (node->lhs->type == NOD_DEREF) { gen_expr(node->lhs->lhs); printf("\tSTA\n"); depth -= 2; } else { const int found = node_find(locals, node->lhs); if (found != -1) { printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); } else printf("\tLIT ,__gl_%.*s\n", node->lhs->len, node->lhs->loc); printf("\tSTA\n"); depth -= 1; } } static void gen_postfix(Node *node) { const int found = node_find(locals, node->lhs); if (found != -1) { printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); } else printf("\tLIT ,__gl_%.*s\n", node->lhs->len, node->lhs->loc); printf("\tLDA\n"); printf("\tDUP\n"); depth += 1; if (node_equal(node->rhs, "++")) printf("\tINC\n"); else if (node_equal(node->rhs, "--")) printf("\tDEC\n"); if (found != -1) { printf("\tLIT ,__stack_ptr LDA\n"); if (locals_size - found - 1) printf("\tLIT %04x SUB\n", locals_size - found - 1); } else printf("\tLIT ,__gl_%.*s\n", node->lhs->len, node->lhs->loc); printf("\tSTA\n"); } static void gen_stmt(Node *node, Node *fname, int break_lbl) { static int label = 0; int lbl; switch (node->type) { case NOD_EMPTY_STMT: break; case NOD_BLOCK_STMT: node = node->lhs; while (node != NULL) { gen_stmt(node, fname, break_lbl); node = node->next; } break; case NOD_EXPR_STMT: gen_expr(node->lhs); printf("\tPOP\n"); depth -= 1; break; case NOD_RETURN_STMT: gen_expr(node->lhs); if (locals_size) printf("\tJMP ,__fnret_%.*s\n", fname->len, fname->loc); else printf("\tRET\n"); depth -= 1; break; case NOD_BREAK_STMT: if (break_lbl == -1) error_loc(node->loc, node->len, "break statement outside of loop"); printf("\tJMP ,__lbl_%x\n", break_lbl); break; case NOD_SLP_STMT: printf("\tSLP\n"); break; case NOD_DBG_STMT: gen_expr(node->lhs); printf("\tDBG\n"); printf("\tPOP\n"); depth -= 1; break; case NOD_WRT_STMT: gen_expr(node->lhs); printf("\tWRT\n"); depth -= 1; break; case NOD_ERR_STMT: gen_expr(node->lhs); printf("\tERR\n"); depth -= 1; break; case NOD_RED_STMT: printf("\tRED\n"); gen_expr(node->lhs); printf("\tSTA\n"); depth -= 1; break; case NOD_ASSIGN_STMT: gen_assign_stmt(node); break; case NOD_IFELSE_STMT: lbl = label; label += 1 + (node->rhs->next != NULL); gen_expr(node->lhs); printf("\tJEZ ,__lbl_%x\n", lbl); depth -= 1; gen_stmt(node->rhs, fname, break_lbl); if (node->rhs->next != NULL) { printf("\tJMP ,__lbl_%x\n", lbl + 1); printf("@__lbl_%x\n", lbl); gen_stmt(node->rhs->next, fname, break_lbl); lbl += 1; } printf("@__lbl_%x\n", lbl); break; case NOD_LOOP_STMT: lbl = label; label += 2; printf("@__lbl_%x\n", lbl); gen_stmt(node->lhs, fname, lbl + 1); printf("\tJMP ,__lbl_%x\n", lbl); printf("@__lbl_%x\n", lbl + 1); break; default: error_loc(node->loc, node->len, "unexpected %d", node->type); } } static void gen_function(Node *node) { Node *const ognode = node; printf("@__fn_%.*s\n", node->lhs->len, node->lhs->loc); Node head = {0}; if (node->lhs->next) head.next = node->lhs->next; const int argcount = node_size(head.next); Node *cur = node_tail(&head); node = node->rhs; if (node->type == NOD_BLOCK_STMT) node = node->lhs; Node localdecshead = {0}; Node *localdecs = &localdecshead; while (node != NULL && node->type == NOD_LOCAL) { cur = cur->next = node->lhs; localdecs = localdecs->next = node; node = node->next; } locals = head.next; locals_size = node_size(locals); if (locals_size) { printf("\tLIT ,__stack_ptr LDA\n"); printf("\tLIT %04x ADD\n", locals_size); printf("\tLIT ,__stack_ptr STA\n"); } /* TODO: optimize this using DUP */ cur = locals; for (int i = 0; i < argcount; i++) { printf("\tLIT ,__stack_ptr LDA\n"); const int offset = locals_size + i - argcount; if (offset) printf("\tLIT %04x SUB\n", offset); printf("\tSTA\n"); cur = cur->next; } for (cur = localdecshead.next; cur != NULL && cur->type == NOD_LOCAL; cur = cur->next) gen_localdec(cur); while (node != NULL) { gen_stmt(node, ognode->lhs, -1); node = node->next; } printf("\tLIT 0000\n"); if (locals_size) { printf("@__fnret_%.*s\n", ognode->lhs->len, ognode->lhs->loc); printf("\tLIT ,__stack_ptr LDA\n"); printf("\tLIT %04x SUB\n", locals_size); printf("\tLIT ,__stack_ptr STA\n"); } printf("\tRET\n"); } static void codegen(Node *node) { printf("\tJRT ,__fn_main\n"); printf("\tRET\n"); while (node != NULL) { if (node->type == NOD_GLOBAL) gen_globaldec(node); else gen_function(node); node = node->next; } for (int i = 0; i < strings_size; i++) { printf("@__arr_%x\n", strings_size - i - 1); if (strings->lhs->type == NOD_STRING) { for (int k = 1; k < strings->lhs->len - 1;) { char *q = strings->lhs->loc + k; const int c = escaped_char(q, &q); if (c == '"') /* TODO: handle this */ error_loc(q - 2, 2, "can't escape double quotes", 0); printf("\t%04x\n", (unsigned)c); k = q - strings->lhs->loc; } } else { Node *cur = strings->lhs->lhs; while (cur != NULL) { printf("\t%04x\n", const_expr(cur)); cur = cur->lhs; } } printf("\t0000\n"); strings = strings->next; } printf("@__stack_ptr\n"); printf("\t,__stack\n"); printf("@__stack\n"); } // program = stmt* static Node * parse(Token *tok) { Node head = {0}; Node *cur = &head; while (tok->type != TOK_EOF) { if (equal(tok, "define")) { define(&tok, tok); continue; } if (equal(tok, "enum")) { denum(&tok, tok); continue; } if (equal(tok, "global")) { cur = cur->next = global(&tok, tok); while (cur->next != NULL) cur = cur->next; continue; } cur = cur->next = function(&tok, tok); } return head.next; } static char * drain(FILE *fp) { if (fseek(fp, 0, SEEK_END) < 0) error("fseek failed", 0); const size_t size = ftell(fp); if (fseek(fp, 0, SEEK_SET) < 0) error("fseek failed", 0); char *data = malloc(size + 1); if (data == NULL) error("malloc failed", 0); if (fread(data, 1, size, fp) != size) error("fread failed", 0); data[size] = '\0'; return data; } int main(int argc, char **argv) { if (argc != 2) error("usage: %s file", argv[0]); FILE *const fp = fopen(argv[1], "rb"); if (fp == NULL) error("%m: '%s'", argv[1]); source = drain(fp); fclose(fp); phase = "lex"; Token *const tok = tokenize(source); phase = "parse"; Node *node = parse(tok); phase = "codegen"; codegen(node); assert(depth == 0); return 0; }