From: fennecdjay Date: Tue, 24 Sep 2019 19:28:09 +0000 (+0200) Subject: :art: Update ast X-Git-Tag: nightly~2202 X-Git-Url: http://10.10.0.4:5575/?a=commitdiff_plain;h=52cc7ad164606a75c967fcd712cf8f0c0b45c652;p=gwion.git :art: Update ast --- diff --git a/ast2/a.out b/ast2/a.out new file mode 100755 index 00000000..9153e27f Binary files /dev/null and b/ast2/a.out differ diff --git a/ast2/keyword.h b/ast2/keyword.h new file mode 100644 index 00000000..ce924af6 --- /dev/null +++ b/ast2/keyword.h @@ -0,0 +1,13 @@ +static const char keywords[][16] = { +//static const char **keywords = { + "return", "typedef", "Test", + "fun", "new", "union", "enum", + "spork", "fork", + "class", "extends", "dtor", + "operator", + "global", "private", "protect", "static", "const", + "if", "else", "break", "continue", "while", "do", "until", + "repeat", "for", "goto", "switch", "case", "enum", + "typeof", "auto", + "__file__", "__line__", +}; diff --git a/ast2/lexer.c b/ast2/lexer.c new file mode 100644 index 00000000..8942414a --- /dev/null +++ b/ast2/lexer.c @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include +#include + +#include "gwion_util.h" +#include "keyword.h" +#include "token.h" +#include "lexer.h" +#include "tdop.h" + +#define ADV { ++lex->right; ++lex->pos; } +#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0]) + +typedef struct Lexer_ { + char * str; + struct TrieNode *root; + int pos; + int line; + char instring; + int left, right; + bool escape; +} Lexer; + +static inline void nl(Lexer *lex, char ch) { + if(ch == '\n') { + ++lex->line; + lex->pos = 1; + } +} + +static inline bool is_escape(Lexer *lex, char ch) { + return ch == '\\' && !lex->escape; +} + +// Returns 'true' if the character is an OPERATOR. +static bool isOperator(const char ch) { + if(ch == '+' || ch == '-' || ch == '*' || + ch == '/' || + ch == '<' || ch == '=' || ch == '?' || + ch == '>' || ch == ':' || ch == '>' || + ch == '$' || ch == '%' || ch == '~' || + ch == '^' || ch == '&' || ch == '!') + return (true); + if(ch == '\n') + return true; + return (false); +} + +// Returns 'true' if the character is a DELIMITER. +static bool isDelimiter(char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\t' || + ch == '(' || ch == ')' || ch == '[' || + ch == ']' || ch == '{' || ch == '}' || + ch == '\\' || + ch == '.' || ch == '\n' || ch == ',' || ch == ';' || + isOperator(ch)) + return (true); + return (false); +} + +// Returns 'true' if the string is a VALID IDENTIFIER. +ANN static bool validIdentifier(const char* str) { + const char c = *str; + if (c == '0' || c == '1' || c == '2' || + c == '3' || c == '4' || c == '5' || + c == '6' || c == '7' || c == '8' || + c == '9' || isDelimiter(c) == true) + return false; + return true; +} + +// Returns 'true' if the string is an INTEGER. +bool isInteger(char* str) { + int i, len = strlen(str) -1; +// if(!len) +// return false; + for(i = 0; i < len; i++) { + if (str[i] != '0' && str[i] != '1' && str[i] != '2' + && str[i] != '3' && str[i] != '4' && str[i] != '5' + && str[i] != '6' && str[i] != '7' && str[i] != '8' + && str[i] != '9') + return false; + } + if(len && + (str[len] == 'u' || str[len] == 'U' || str[len] == 'l' || str[len] == 'L')) + return true; + return isdigit(str[i]); +} + +// Extracts the SUBSTRING. +char* subString(char* str, int left, int right) { + int i; + char* subStr = (char*)malloc( + sizeof(char) * (right - left + 2)); + + for (i = left; i <= right; i++) + subStr[i - left] = str[i]; + subStr[right - left + 1] = '\0'; + return (subStr); +} + +static inline char isstring(Lexer *lex, const char ch) { + if(!lex->escape && (ch == '"' || ch == '\'')) + return ch; + return 0; +} + +struct TrieNode* drive(const Vector); +void free_node(struct TrieNode*); +long unsigned int search(struct TrieNode *const restrict root, const char *key); + +// Parsing the input STRING. +//void parse(char* str) { +Token lexer_consume(Lexer *lex) { + lex->left = lex->right; + int len = strlen(lex->str); + while (lex->right <= len && lex->left <= lex->right) { + nl(lex, lex->str[lex->right]); + lex->escape = is_escape(lex, lex->str[lex->right]); + char isstr = isstring(lex, lex->str[lex->right]); + if(isstr) { +// if(isstr && isstr == instring) { + if(lex->instring) { + char* subStr = subString(lex->str, lex->left, lex->right); + printf("'%s' IS A VALID %s\n", subStr, + lex->instring == '"' ? "STRING" : "CHAR"); +xfree(subStr); + lex->instring = 0; +ADV; +if(lex->escape) +ADV; +return (Token){ .type='s', .value="s"}; // string or char + } else + lex->instring = isstr; + ADV + continue; + } else if(lex->instring) { + ADV + continue; + } + if(isDelimiter(lex->str[lex->right]) == false) + ADV; + if(isDelimiter(lex->str[lex->right]) == true && lex->left == lex->right) { + if(isOperator(lex->str[lex->right]) == true) { + do ADV while (isOperator(lex->str[lex->right]) == true); + char* subStr = subString(lex->str, lex->left, lex->right - 1); + printf("'%s' IS AN OPERATOR\n", subStr); + //ADV; + xfree(subStr); + return (Token){ .type=lex->str[lex->right-1], .value="s" }; // string or char + } else { + char* subStr = subString(lex->str, lex->left, lex->right); + const char type = lex->str[lex->right]; + printf("'%s' IS A DELIMITER %c\n", subStr, lex->str[lex->right]); +ADV; + xfree(subStr); + return (Token){ .type=type, .value="s" }; // string or char + } + lex->left = lex->right; + } else if ((isDelimiter(lex->str[lex->right]) == true && lex->left != lex->right) + || (lex->right == len && lex->left != lex->right)) { + char* subStr = subString(lex->str, lex->left, lex->right - 1); + if(search(lex->root, subStr)) { + printf("'%s' IS A KEYWORD\n", subStr); +xfree(subStr); +//return (Token){ .type='k', .value="s" }; // string or char +/*}*/ + + } else if (isInteger(subStr) == true) { + printf("'%s' IS AN INTEGER\n", subStr); + xfree(subStr); + return (Token){ .type='i', .value="s" }; // string or char + } else if (validIdentifier(subStr) == true + && isDelimiter(lex->str[lex->right - 1]) == false) { + printf("'%s' IS A VALID IDENTIFIER\n", subStr); + xfree(subStr); + return (Token){ .type='$', .value="id" }; + } else if (validIdentifier(subStr) == false + && isDelimiter(lex->str[lex->right - 1]) == false) { + printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr); + xfree(subStr); + return (Token){}; + } else + xfree(subStr); + lex->left = lex->right; + } + } + return lex->right < len ? (Token){ .type= 'c' } : (Token){}; +} + +int main(int argc NUSED, char **argv) { + assert(argv[1]); + Lexer lex = { .line=1, .pos=1, .str = argv[1] }; + struct Vector_ keys; + vector_init(&keys); + for(long unsigned int i = 0; i < ARRAY_SIZE(keywords); i++) + vector_add(&keys, (vtype)keywords[i]); + lex.root = drive(&keys);//init the trie +/* + { + const Token empty_token = {}; + Token t; + do { t = lexer_consume(&lex); } + while(memcmp(&t, &empty_token, sizeof(Token))); + } +*/ + Parser p = { .lex=&lex }; + { +// const Token empty_token = {}; + Token t; + do { t = tdop(&p, 0); } + while(memcmp(&t, &empty_token, sizeof(Token))); + } + printf("%i lines %i pos\n", lex.line, lex.pos); + free_node(lex.root); + vector_release(&keys); + return (0); +} diff --git a/ast2/lexer.h b/ast2/lexer.h new file mode 100644 index 00000000..28c4ad80 --- /dev/null +++ b/ast2/lexer.h @@ -0,0 +1,7 @@ +#ifndef __LEXER +#define __LEXER +struct Lexer_; +typedef struct Lexer_ Lexer; +Token lexer_consume(Lexer *lex); +#endif + diff --git a/ast2/line b/ast2/line new file mode 100644 index 00000000..e69de29b diff --git a/ast2/pratt.c b/ast2/pratt.c new file mode 100644 index 00000000..711f1ff7 --- /dev/null +++ b/ast2/pratt.c @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include +#include "token.h" +#include "lexer.h" +#include "tdop.h" +#define T_VAR 1 + +static struct TokenDesc tokens[256]; + +Token tdop(Parser* p, int rbp); +//static Token empty_token; + +static Token led_add(Parser* p){ + puts(__func__); + const Token right = tdop(p, 100); + return right; +} + +static Token nud_minus(Parser* p){ + puts(__func__); +// const Token right = tdop(p, 10); + return p->self; +} + +static Token nud_mul(Parser* p){ + puts(__func__); + const Token right = tdop(p, 100); + return right; +} + +static Token led_mul(Parser* p){ + puts(__func__); +// const Token right = tdop(p, 10); +//printf("%c %s\n", right.type, right.value); +// return right; +} + +static Token led_minus(Parser* p){ +//exit(3); + printf("is it binary minus?\n", __func__); + const Token right = tdop(p, 100); + return right; +} + +static Token nud_paren(Parser* p){ +puts(__func__); + Token t = tdop(p, 0); +puts("here"); +// if(p->self.type != ')'){ +// if(p->left.type != ')'){ + if(t.type != ')'){ + printf(" missing ')' %c %s", t.type, t.value); + printf(" missing ')' %c %s", p->left.type, p->left.value); + exit(1); + } else { +// t = tdop(p, 0); +// p->input++; + } + return t; +} + +static struct TokenDesc tokens[256] = { +// ['?'] = { 1, NULL , led_question }, + ['('] = { 0, nud_paren, NULL }, + [')'] = { 0, NULL, NULL }, +// ['>'] = { 2, NULL , led_implies }, +// ['v'] = { 3, NULL , led_or }, +// ['^'] = { 4, NULL , led_and }, +// ['~'] = { 0, nud_not , NULL }, +// ['+'] = { 10, nud_add, led_add }, +// ['+'] = { 10, NULL, led_add }, +// ['-'] = { 10, nud_minus, led_minus }, +// ['*'] = { 20, nud_mul, led_mul }, +// ['i'] = { 0, NULL, NULL }, +}; + +static Token vars[256]; + +static Token nonud(Parser* p){ + printf("[%s] ...\n", __func__); + if(tokens[p->self.type].led == NULL){ + Token t = vars[p->self.type]; +// if(t.value) +// assert(t.value); + return t; +// else { +// t = generate(); +// vars[p->self.type] = t; +// return t; +// } + } else { + printf(" '%c' has no argument.\n", p->self.type); + exit(1); + } +// exit(3); +} + +Token tdop(Parser* p, const int rbp){ +printf("[%s] %i\n", __func__, rbp); + p->self = lexer_consume(p->lex); +printf("start with %c\n", p->self.type); + if(!memcmp(&p->self, &empty_token, sizeof(Token))) + return empty_token; + struct TokenDesc* d = tokens + p->self.type; + if(d->nud) + p->left = d->nud(p); + else + p->left = nonud(p); + for(;;){ +printf("consume %c\n", p->self.type); + p->self = lexer_consume(p->lex); + if(!memcmp(&p->self, &empty_token, sizeof(Token))) + return empty_token; + struct TokenDesc * d = tokens + p->self.type; + + if(rbp >= d->lbp) + break; + +// p->input++; + + if(d->led) + p->left = d->led(p); + } + return p->left; +} diff --git a/ast2/pratt.c.bak b/ast2/pratt.c.bak new file mode 100644 index 00000000..2467b141 --- /dev/null +++ b/ast2/pratt.c.bak @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include "token.h" +#define T_VAR 1 + +struct Parser { + uint8_t* input; + Token left; + Token self; +}; + +static struct TokenDesc tokens[256]; + +static Token parse (struct Parser* p, int rbp); + + +static Token nud_add(struct Parser* p){ +puts("add"); +// puts(p->left.value); +Token right = parse(p, 6); + puts(right.value); + return right; +//boole(0b0111, p->left, right); +} + +static struct TokenDesc tokens[256] = { +/* + ['?'] = { 1, NULL , led_question }, + ['('] = { 0, nud_paren, NULL }, + [')'] = { 0, NULL , NULL }, + ['>'] = { 2, NULL , led_implies }, + ['v'] = { 3, NULL , led_or }, + ['^'] = { 4, NULL , led_and }, + ['~'] = { 0, nud_not , NULL }, +*/ + ['+'] = { 1, nud_add , nud_add }, + +}; + +static Token parse (struct Parser* p, int rbp); +static Token vars[256]; + +static Token generate(void) { +// static int k = 1; + + Token t = { + .type = T_VAR, +// .value = calloc(k*2+1, 1), +.len = 0 + }; + +// memset(t.value + 0, '0', k); +// memset(t.value + k, '1', k); + +// k <<= 1; + + return t; +} + +static Token nonud(struct Parser* p){ + if(tokens[p->self.type].led == NULL){ + Token t = vars[p->self.type]; +// if(t.value != '\0') { + if(t.value) { +t.value[t.len++] = *p->input; +//exit(3); +p->self.value[p->self.len++] = *p->input; +printf("[%s] %s:%s %i %i\n", __func__, p->self.value, t.value, p->self.len, p->self.type); +//exit(3); +//p->self = +return parse(p, 0); +return t; +//return p->self; +} + else { +exit(3); +assert(p->self.type == T_VAR); +if(*p->input >= 'a' && *p->input <= 'Z') { + p->self.value[t.len++] = *p->input; +printf("nonud (no value) %c %s\n", p->self.type, t.value); +// vars[p->self.type] = t; +return parse(p, 0); +} +// else + vars[p->self.type] = t; + return t; + } + } else { + printf(" '%c' has no argument.\n", p->self.type); + exit(1); + } +} + +static Token parse(struct Parser* p, int rbp){ +printf("[%s] %c\n", __func__, *p->input); + if(*p->input == 0) + return (Token){}; + + uint8_t c = *p->input++; + p->self = (Token){ c }; +//p->self.value[0] = c; + +// struct token_desc* d = tokens + c; + struct TokenDesc* d = tokens + c; + if(d->nud){ +puts("has nud"); + p->left = d->nud(p); + } else { + p->left = nonud(p); + } + + for(;;){ +puts("start loop"); + c = *p->input; + p->self = (Token){ c }; + d = tokens + c; + + if(c == 0) + return (Token){}; +puts("mark 0"); + if(rbp >= d->lbp) // orig + break; + +puts("mark 1"); + p->input++; + + if(d->led) + p->left = d->led(p); + } +puts("end of parse"); + return p->left; +} + +int main(void){ + char buf[256]; + if(!fgets(buf, sizeof(buf), stdin)) + return 1; + + printf("Input: %s", buf); + + struct Parser p = { + .input = (uint8_t*)buf, + }; + + parse(&p, 0); + + return 0; +} diff --git a/ast2/pratt.h b/ast2/pratt.h new file mode 100644 index 00000000..72a9b0fe --- /dev/null +++ b/ast2/pratt.h @@ -0,0 +1,12 @@ +#ifndef __Parser +#define __Parser +typedef struct Parser_ { + Lexer *lex; +// uint8_t* input; + Token left; + Token self; +} Parser; + +static Token tdop(Parser* p, int rbp); +static Token empty_token; +#endif diff --git a/ast2/tdop.h b/ast2/tdop.h new file mode 100644 index 00000000..ebf4d740 --- /dev/null +++ b/ast2/tdop.h @@ -0,0 +1,12 @@ +#ifndef __Parser +#define __Parser +typedef struct Parser_ { + Lexer *lex; +// uint8_t* input; + Token left; + Token self; +} Parser; + +Token tdop(Parser* p, int rbp); +static Token empty_token; +#endif diff --git a/ast2/token.h b/ast2/token.h new file mode 100644 index 00000000..3e7406ad --- /dev/null +++ b/ast2/token.h @@ -0,0 +1,19 @@ +#ifndef __TOKEN +#define __TOKEN + +struct Parser_; +typedef struct Token_ { + uint8_t type; + char* value; + size_t len; +} Token; + +typedef Token (*SemanticCode) (struct Parser_*); + +struct TokenDesc { + int lbp; + SemanticCode nud; + SemanticCode led; +}; + +#endif diff --git a/ast2/trie.c b/ast2/trie.c new file mode 100644 index 00000000..01994e0f --- /dev/null +++ b/ast2/trie.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include "gwion_util.h" + +// Alphabet size (# of symbols) +#define ALPHABET_SIZE (256) + +// trie node +struct TrieNode { + struct TrieNode *children[ALPHABET_SIZE]; + int idx; +}; + +struct TrieRoot { + struct TrieNode *self; + int idx; +}; + +// Returns new trie node (initialized to NULLs) +struct TrieNode *getNode(void) { + struct TrieNode *pNode = (struct TrieNode *)malloc(sizeof(struct TrieNode)); + if(pNode) { + pNode->idx = 0; + for(int i = 0; i < ALPHABET_SIZE; i++) + pNode->children[i] = NULL; + } + return pNode; +} +/* +struct TrieRoot *getRoot(int idx) { + struct TrieRoot *root = (struct TrieRoot *)malloc(sizeof(struct TrieRoot)); + root->self = getNode(); + root->idx = idx; + return root; +} +*/ +// If not present, inserts key into trie +// If the key is prefix of trie node, just marks leaf node +//static void insert(struct TrieNode *const root, const char *key) { +static struct TrieNode* insert(struct TrieNode *const restrict root, const char *key) { + const int length = strlen(key); + struct TrieNode *pCrawl = root; + for(int level = 0; level < length; level++) { + const int index = key[level]; + if(!pCrawl->children[index]) + pCrawl->children[index] = getNode(); + pCrawl = pCrawl->children[index]; + } + return pCrawl; +} + +// Returns true if key presents in trie, else false +int search(struct TrieNode *const restrict root, const char *key) { + const int length = strlen(key); + struct TrieNode *n = root; + for (int level = 0; level < length; level++) { + const int index = key[level]; + if(!n->children[index]) + return false; + n = n->children[index]; + } + return n ? n->idx : 0; +} + +void free_node(struct TrieNode *n) { + for (int i = 0; i < ALPHABET_SIZE; i++) { + if(n->children[i]) + free_node(n->children[i]); + } + free(n); +} + + +// Driver +struct TrieNode* drive(const Vector v) { + struct TrieNode *root = getNode(); + for(m_uint i = 0; i < vector_size(v); ++i) { + struct TrieNode *n = insert(root, (char*)vector_at(v, i)); + n->idx = i + 1; + } + return root; +} diff --git a/ast2/vgcore.20023 b/ast2/vgcore.20023 new file mode 100644 index 00000000..2ca4a47b Binary files /dev/null and b/ast2/vgcore.20023 differ