#ifndef CERC_LEX_H #define CERC_LEX_H #include #include #include "utf8.h" #define C_EOF UTF8_INVALID enum lexical_token { // Keywords T_ALIGN, T_AS, T_BREAK, T_CONST, T_CONTINUE, T_DEFER, T_ELSE, T_ENUM, T_EXTERN, T_FALSE, T_FOR, T_FUNC, T_IF, T_INCLUDE, T_LET, T_PUB, T_RETURN, T_STRUCT, T_TAGOF, T_TRUE, T_UNION, T_VAR, T_LAST_KEYWORD, // Builtin types T_BOOL, T_FLOAT32, T_FLOAT64, T_INT8, T_INT16, T_INT32, T_INT64, T_INT, T_UINT8, T_UINT16, T_UINT32, T_UINT64, T_UINT, T_LAST_BUILTIN_TYPE, // Operators T_ADD, T_ADDASSIGN, T_AND, T_ASSIGN, T_BAND, T_BANDASSIGN, T_BANG, T_BNOT, T_BOR, T_BORASSIGN, T_BSHL, T_BSHR, T_BXOR, T_BXORASSIGN, T_COLON, T_COMMA, T_DECR, T_DIV, T_DIVASSIGN, T_DOT, T_EQ, T_GE, T_GT, T_INCR, T_LBRACE, T_LBRACKET, T_LE, T_LPAREN, T_LT, T_MINUS, T_MODASSIGN, T_MODDELIM, T_MODULO, T_MULASSIGN, T_NEQ, T_OR, T_QUESTION, T_RBRACE, T_RBRACKET, T_RPAREN, T_SEMICOLON, T_STAR, T_SUBASSIGN, T_UNDERSCORE, T_LAST_OPERATOR, // Tokens with additional information T_COMMENT, T_NAME, T_NUMBER, T_CHAR, T_STRING, // Magic values T_EOF, T_NONE, }; extern const char *tokens[]; struct number { bool isfloat; union { uint64_t integer; double floatingpt; } value; }; struct location { const char *file; int line, column; }; struct token { enum lexical_token token; struct location loc; union { struct number num; unsigned int chr; const char *str; } info; }; struct lexer { FILE *in; uint32_t c[2]; char *buf; size_t bufsz, buflen; struct token un; struct location loc; }; void lex_init(struct lexer *lexer, FILE *f, const char *filename); void lex_finish(struct lexer *lexer); struct location lex_loc(struct lexer *lexer); enum lexical_token lex(struct lexer *lexer, struct token *out); enum lexical_token lex_any(struct lexer *lexer, struct token *out); void unlex(struct lexer *lexer, const struct token *in); bool match(struct lexer *lexer, enum lexical_token token); #endif