--- /dev/null
+static const char keywords[][16] = {
+//static const char **keywords = {
+ "return", "typedef", "Test",
+ "fun", "new", "union", "enum",
+ "spork", "fork",
+ "class", "extends", "dtor",
+ "operator",
+ "global", "private", "protect", "static", "const",
+ "if", "else", "break", "continue", "while", "do", "until",
+ "repeat", "for", "goto", "switch", "case", "enum",
+ "typeof", "auto",
+ "__file__", "__line__",
+};
--- /dev/null
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "gwion_util.h"
+#include "keyword.h"
+#include "token.h"
+#include "lexer.h"
+#include "tdop.h"
+
+#define ADV { ++lex->right; ++lex->pos; }
+#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
+
+typedef struct Lexer_ {
+ char * str;
+ struct TrieNode *root;
+ int pos;
+ int line;
+ char instring;
+ int left, right;
+ bool escape;
+} Lexer;
+
+static inline void nl(Lexer *lex, char ch) {
+ if(ch == '\n') {
+ ++lex->line;
+ lex->pos = 1;
+ }
+}
+
+static inline bool is_escape(Lexer *lex, char ch) {
+ return ch == '\\' && !lex->escape;
+}
+
+// Returns 'true' if the character is an OPERATOR.
+static bool isOperator(const char ch) {
+ if(ch == '+' || ch == '-' || ch == '*' ||
+ ch == '/' ||
+ ch == '<' || ch == '=' || ch == '?' ||
+ ch == '>' || ch == ':' || ch == '>' ||
+ ch == '$' || ch == '%' || ch == '~' ||
+ ch == '^' || ch == '&' || ch == '!')
+ return (true);
+ if(ch == '\n')
+ return true;
+ return (false);
+}
+
+// Returns 'true' if the character is a DELIMITER.
+static bool isDelimiter(char ch)
+{
+ if (ch == ' ' || ch == '\r' || ch == '\t' ||
+ ch == '(' || ch == ')' || ch == '[' ||
+ ch == ']' || ch == '{' || ch == '}' ||
+ ch == '\\' ||
+ ch == '.' || ch == '\n' || ch == ',' || ch == ';' ||
+ isOperator(ch))
+ return (true);
+ return (false);
+}
+
+// Returns 'true' if the string is a VALID IDENTIFIER.
+ANN static bool validIdentifier(const char* str) {
+ const char c = *str;
+ if (c == '0' || c == '1' || c == '2' ||
+ c == '3' || c == '4' || c == '5' ||
+ c == '6' || c == '7' || c == '8' ||
+ c == '9' || isDelimiter(c) == true)
+ return false;
+ return true;
+}
+
+// Returns 'true' if the string is an INTEGER.
+bool isInteger(char* str) {
+ int i, len = strlen(str) -1;
+// if(!len)
+// return false;
+ for(i = 0; i < len; i++) {
+ if (str[i] != '0' && str[i] != '1' && str[i] != '2'
+ && str[i] != '3' && str[i] != '4' && str[i] != '5'
+ && str[i] != '6' && str[i] != '7' && str[i] != '8'
+ && str[i] != '9')
+ return false;
+ }
+ if(len &&
+ (str[len] == 'u' || str[len] == 'U' || str[len] == 'l' || str[len] == 'L'))
+ return true;
+ return isdigit(str[i]);
+}
+
+// Extracts the SUBSTRING.
+char* subString(char* str, int left, int right) {
+ int i;
+ char* subStr = (char*)malloc(
+ sizeof(char) * (right - left + 2));
+
+ for (i = left; i <= right; i++)
+ subStr[i - left] = str[i];
+ subStr[right - left + 1] = '\0';
+ return (subStr);
+}
+
+static inline char isstring(Lexer *lex, const char ch) {
+ if(!lex->escape && (ch == '"' || ch == '\''))
+ return ch;
+ return 0;
+}
+
+struct TrieNode* drive(const Vector);
+void free_node(struct TrieNode*);
+long unsigned int search(struct TrieNode *const restrict root, const char *key);
+
+// Parsing the input STRING.
+//void parse(char* str) {
+Token lexer_consume(Lexer *lex) {
+ lex->left = lex->right;
+ int len = strlen(lex->str);
+ while (lex->right <= len && lex->left <= lex->right) {
+ nl(lex, lex->str[lex->right]);
+ lex->escape = is_escape(lex, lex->str[lex->right]);
+ char isstr = isstring(lex, lex->str[lex->right]);
+ if(isstr) {
+// if(isstr && isstr == instring) {
+ if(lex->instring) {
+ char* subStr = subString(lex->str, lex->left, lex->right);
+ printf("'%s' IS A VALID %s\n", subStr,
+ lex->instring == '"' ? "STRING" : "CHAR");
+xfree(subStr);
+ lex->instring = 0;
+ADV;
+if(lex->escape)
+ADV;
+return (Token){ .type='s', .value="s"}; // string or char
+ } else
+ lex->instring = isstr;
+ ADV
+ continue;
+ } else if(lex->instring) {
+ ADV
+ continue;
+ }
+ if(isDelimiter(lex->str[lex->right]) == false)
+ ADV;
+ if(isDelimiter(lex->str[lex->right]) == true && lex->left == lex->right) {
+ if(isOperator(lex->str[lex->right]) == true) {
+ do ADV while (isOperator(lex->str[lex->right]) == true);
+ char* subStr = subString(lex->str, lex->left, lex->right - 1);
+ printf("'%s' IS AN OPERATOR\n", subStr);
+ //ADV;
+ xfree(subStr);
+ return (Token){ .type=lex->str[lex->right-1], .value="s" }; // string or char
+ } else {
+ char* subStr = subString(lex->str, lex->left, lex->right);
+ const char type = lex->str[lex->right];
+ printf("'%s' IS A DELIMITER %c\n", subStr, lex->str[lex->right]);
+ADV;
+ xfree(subStr);
+ return (Token){ .type=type, .value="s" }; // string or char
+ }
+ lex->left = lex->right;
+ } else if ((isDelimiter(lex->str[lex->right]) == true && lex->left != lex->right)
+ || (lex->right == len && lex->left != lex->right)) {
+ char* subStr = subString(lex->str, lex->left, lex->right - 1);
+ if(search(lex->root, subStr)) {
+ printf("'%s' IS A KEYWORD\n", subStr);
+xfree(subStr);
+//return (Token){ .type='k', .value="s" }; // string or char
+/*}*/
+
+ } else if (isInteger(subStr) == true) {
+ printf("'%s' IS AN INTEGER\n", subStr);
+ xfree(subStr);
+ return (Token){ .type='i', .value="s" }; // string or char
+ } else if (validIdentifier(subStr) == true
+ && isDelimiter(lex->str[lex->right - 1]) == false) {
+ printf("'%s' IS A VALID IDENTIFIER\n", subStr);
+ xfree(subStr);
+ return (Token){ .type='$', .value="id" };
+ } else if (validIdentifier(subStr) == false
+ && isDelimiter(lex->str[lex->right - 1]) == false) {
+ printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
+ xfree(subStr);
+ return (Token){};
+ } else
+ xfree(subStr);
+ lex->left = lex->right;
+ }
+ }
+ return lex->right < len ? (Token){ .type= 'c' } : (Token){};
+}
+
+int main(int argc NUSED, char **argv) {
+ assert(argv[1]);
+ Lexer lex = { .line=1, .pos=1, .str = argv[1] };
+ struct Vector_ keys;
+ vector_init(&keys);
+ for(long unsigned int i = 0; i < ARRAY_SIZE(keywords); i++)
+ vector_add(&keys, (vtype)keywords[i]);
+ lex.root = drive(&keys);//init the trie
+/*
+ {
+ const Token empty_token = {};
+ Token t;
+ do { t = lexer_consume(&lex); }
+ while(memcmp(&t, &empty_token, sizeof(Token)));
+ }
+*/
+ Parser p = { .lex=&lex };
+ {
+// const Token empty_token = {};
+ Token t;
+ do { t = tdop(&p, 0); }
+ while(memcmp(&t, &empty_token, sizeof(Token)));
+ }
+ printf("%i lines %i pos\n", lex.line, lex.pos);
+ free_node(lex.root);
+ vector_release(&keys);
+ return (0);
+}
--- /dev/null
+#ifndef __LEXER
+#define __LEXER
+struct Lexer_;
+typedef struct Lexer_ Lexer;
+Token lexer_consume(Lexer *lex);
+#endif
+
--- /dev/null
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include "token.h"
+#include "lexer.h"
+#include "tdop.h"
+#define T_VAR 1
+
+static struct TokenDesc tokens[256];
+
+Token tdop(Parser* p, int rbp);
+//static Token empty_token;
+
+static Token led_add(Parser* p){
+ puts(__func__);
+ const Token right = tdop(p, 100);
+ return right;
+}
+
+static Token nud_minus(Parser* p){
+ puts(__func__);
+// const Token right = tdop(p, 10);
+ return p->self;
+}
+
+static Token nud_mul(Parser* p){
+ puts(__func__);
+ const Token right = tdop(p, 100);
+ return right;
+}
+
+static Token led_mul(Parser* p){
+ puts(__func__);
+// const Token right = tdop(p, 10);
+//printf("%c %s\n", right.type, right.value);
+// return right;
+}
+
+static Token led_minus(Parser* p){
+//exit(3);
+ printf("is it binary minus?\n", __func__);
+ const Token right = tdop(p, 100);
+ return right;
+}
+
+static Token nud_paren(Parser* p){
+puts(__func__);
+ Token t = tdop(p, 0);
+puts("here");
+// if(p->self.type != ')'){
+// if(p->left.type != ')'){
+ if(t.type != ')'){
+ printf(" missing ')' %c %s", t.type, t.value);
+ printf(" missing ')' %c %s", p->left.type, p->left.value);
+ exit(1);
+ } else {
+// t = tdop(p, 0);
+// p->input++;
+ }
+ return t;
+}
+
+static struct TokenDesc tokens[256] = {
+// ['?'] = { 1, NULL , led_question },
+ ['('] = { 0, nud_paren, NULL },
+ [')'] = { 0, NULL, NULL },
+// ['>'] = { 2, NULL , led_implies },
+// ['v'] = { 3, NULL , led_or },
+// ['^'] = { 4, NULL , led_and },
+// ['~'] = { 0, nud_not , NULL },
+// ['+'] = { 10, nud_add, led_add },
+// ['+'] = { 10, NULL, led_add },
+// ['-'] = { 10, nud_minus, led_minus },
+// ['*'] = { 20, nud_mul, led_mul },
+// ['i'] = { 0, NULL, NULL },
+};
+
+static Token vars[256];
+
+static Token nonud(Parser* p){
+ printf("[%s] ...\n", __func__);
+ if(tokens[p->self.type].led == NULL){
+ Token t = vars[p->self.type];
+// if(t.value)
+// assert(t.value);
+ return t;
+// else {
+// t = generate();
+// vars[p->self.type] = t;
+// return t;
+// }
+ } else {
+ printf(" '%c' has no argument.\n", p->self.type);
+ exit(1);
+ }
+// exit(3);
+}
+
+Token tdop(Parser* p, const int rbp){
+printf("[%s] %i\n", __func__, rbp);
+ p->self = lexer_consume(p->lex);
+printf("start with %c\n", p->self.type);
+ if(!memcmp(&p->self, &empty_token, sizeof(Token)))
+ return empty_token;
+ struct TokenDesc* d = tokens + p->self.type;
+ if(d->nud)
+ p->left = d->nud(p);
+ else
+ p->left = nonud(p);
+ for(;;){
+printf("consume %c\n", p->self.type);
+ p->self = lexer_consume(p->lex);
+ if(!memcmp(&p->self, &empty_token, sizeof(Token)))
+ return empty_token;
+ struct TokenDesc * d = tokens + p->self.type;
+
+ if(rbp >= d->lbp)
+ break;
+
+// p->input++;
+
+ if(d->led)
+ p->left = d->led(p);
+ }
+ return p->left;
+}
--- /dev/null
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "token.h"
+#define T_VAR 1
+
+struct Parser {
+ uint8_t* input;
+ Token left;
+ Token self;
+};
+
+static struct TokenDesc tokens[256];
+
+static Token parse (struct Parser* p, int rbp);
+
+
+static Token nud_add(struct Parser* p){
+puts("add");
+// puts(p->left.value);
+Token right = parse(p, 6);
+ puts(right.value);
+ return right;
+//boole(0b0111, p->left, right);
+}
+
+static struct TokenDesc tokens[256] = {
+/*
+ ['?'] = { 1, NULL , led_question },
+ ['('] = { 0, nud_paren, NULL },
+ [')'] = { 0, NULL , NULL },
+ ['>'] = { 2, NULL , led_implies },
+ ['v'] = { 3, NULL , led_or },
+ ['^'] = { 4, NULL , led_and },
+ ['~'] = { 0, nud_not , NULL },
+*/
+ ['+'] = { 1, nud_add , nud_add },
+
+};
+
+static Token parse (struct Parser* p, int rbp);
+static Token vars[256];
+
+static Token generate(void) {
+// static int k = 1;
+
+ Token t = {
+ .type = T_VAR,
+// .value = calloc(k*2+1, 1),
+.len = 0
+ };
+
+// memset(t.value + 0, '0', k);
+// memset(t.value + k, '1', k);
+
+// k <<= 1;
+
+ return t;
+}
+
+static Token nonud(struct Parser* p){
+ if(tokens[p->self.type].led == NULL){
+ Token t = vars[p->self.type];
+// if(t.value != '\0') {
+ if(t.value) {
+t.value[t.len++] = *p->input;
+//exit(3);
+p->self.value[p->self.len++] = *p->input;
+printf("[%s] %s:%s %i %i\n", __func__, p->self.value, t.value, p->self.len, p->self.type);
+//exit(3);
+//p->self =
+return parse(p, 0);
+return t;
+//return p->self;
+}
+ else {
+exit(3);
+assert(p->self.type == T_VAR);
+if(*p->input >= 'a' && *p->input <= 'Z') {
+ p->self.value[t.len++] = *p->input;
+printf("nonud (no value) %c %s\n", p->self.type, t.value);
+// vars[p->self.type] = t;
+return parse(p, 0);
+}
+// else
+ vars[p->self.type] = t;
+ return t;
+ }
+ } else {
+ printf(" '%c' has no argument.\n", p->self.type);
+ exit(1);
+ }
+}
+
+static Token parse(struct Parser* p, int rbp){
+printf("[%s] %c\n", __func__, *p->input);
+ if(*p->input == 0)
+ return (Token){};
+
+ uint8_t c = *p->input++;
+ p->self = (Token){ c };
+//p->self.value[0] = c;
+
+// struct token_desc* d = tokens + c;
+ struct TokenDesc* d = tokens + c;
+ if(d->nud){
+puts("has nud");
+ p->left = d->nud(p);
+ } else {
+ p->left = nonud(p);
+ }
+
+ for(;;){
+puts("start loop");
+ c = *p->input;
+ p->self = (Token){ c };
+ d = tokens + c;
+
+ if(c == 0)
+ return (Token){};
+puts("mark 0");
+ if(rbp >= d->lbp) // orig
+ break;
+
+puts("mark 1");
+ p->input++;
+
+ if(d->led)
+ p->left = d->led(p);
+ }
+puts("end of parse");
+ return p->left;
+}
+
+int main(void){
+ char buf[256];
+ if(!fgets(buf, sizeof(buf), stdin))
+ return 1;
+
+ printf("Input: %s", buf);
+
+ struct Parser p = {
+ .input = (uint8_t*)buf,
+ };
+
+ parse(&p, 0);
+
+ return 0;
+}
--- /dev/null
+#ifndef __Parser
+#define __Parser
+typedef struct Parser_ {
+ Lexer *lex;
+// uint8_t* input;
+ Token left;
+ Token self;
+} Parser;
+
+static Token tdop(Parser* p, int rbp);
+static Token empty_token;
+#endif
--- /dev/null
+#ifndef __Parser
+#define __Parser
+typedef struct Parser_ {
+ Lexer *lex;
+// uint8_t* input;
+ Token left;
+ Token self;
+} Parser;
+
+Token tdop(Parser* p, int rbp);
+static Token empty_token;
+#endif
--- /dev/null
+#ifndef __TOKEN
+#define __TOKEN
+
+struct Parser_;
+typedef struct Token_ {
+ uint8_t type;
+ char* value;
+ size_t len;
+} Token;
+
+typedef Token (*SemanticCode) (struct Parser_*);
+
+struct TokenDesc {
+ int lbp;
+ SemanticCode nud;
+ SemanticCode led;
+};
+
+#endif
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "gwion_util.h"
+
+// Alphabet size (# of symbols)
+#define ALPHABET_SIZE (256)
+
+// trie node
+struct TrieNode {
+ struct TrieNode *children[ALPHABET_SIZE];
+ int idx;
+};
+
+struct TrieRoot {
+ struct TrieNode *self;
+ int idx;
+};
+
+// Returns new trie node (initialized to NULLs)
+struct TrieNode *getNode(void) {
+ struct TrieNode *pNode = (struct TrieNode *)malloc(sizeof(struct TrieNode));
+ if(pNode) {
+ pNode->idx = 0;
+ for(int i = 0; i < ALPHABET_SIZE; i++)
+ pNode->children[i] = NULL;
+ }
+ return pNode;
+}
+/*
+struct TrieRoot *getRoot(int idx) {
+ struct TrieRoot *root = (struct TrieRoot *)malloc(sizeof(struct TrieRoot));
+ root->self = getNode();
+ root->idx = idx;
+ return root;
+}
+*/
+// If not present, inserts key into trie
+// If the key is prefix of trie node, just marks leaf node
+//static void insert(struct TrieNode *const root, const char *key) {
+static struct TrieNode* insert(struct TrieNode *const restrict root, const char *key) {
+ const int length = strlen(key);
+ struct TrieNode *pCrawl = root;
+ for(int level = 0; level < length; level++) {
+ const int index = key[level];
+ if(!pCrawl->children[index])
+ pCrawl->children[index] = getNode();
+ pCrawl = pCrawl->children[index];
+ }
+ return pCrawl;
+}
+
+// Returns true if key presents in trie, else false
+int search(struct TrieNode *const restrict root, const char *key) {
+ const int length = strlen(key);
+ struct TrieNode *n = root;
+ for (int level = 0; level < length; level++) {
+ const int index = key[level];
+ if(!n->children[index])
+ return false;
+ n = n->children[index];
+ }
+ return n ? n->idx : 0;
+}
+
+void free_node(struct TrieNode *n) {
+ for (int i = 0; i < ALPHABET_SIZE; i++) {
+ if(n->children[i])
+ free_node(n->children[i]);
+ }
+ free(n);
+}
+
+
+// Driver
+struct TrieNode* drive(const Vector v) {
+ struct TrieNode *root = getNode();
+ for(m_uint i = 0; i < vector_size(v); ++i) {
+ struct TrieNode *n = insert(root, (char*)vector_at(v, i));
+ n->idx = i + 1;
+ }
+ return root;
+}