/* Iparser -- An interpretting parser Copyright (C) 2001 Frans Faase This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. GNU General Public License: http://www.iwriteiam.nl/GNU.txt Description: This program implements a parser with the following unique features: - It is an interpretting parser, meaning that on the input it expects at least two arguments, namely: - a filename of a file with a grammer, and - a filename of a file to be parsed according to the grammar. - Implements a "greedy" back-tracking parser, thus supporting a wide range of grammers. Can deal with many ambigious grammers. - Uses a smart caching algorithm for high performance. - Grammar supports keywords like OPT, LIST, SEQ and CHAIN. - Grammer supports common C-like non-terminals. - Scanner is controlled by parser, instead of being implemented as a first pass. This allows context-sensitve scanning. - Has output option to implement a parser with "hard-coded" grammar. So, you don't need to supply the first argument anymore. - Builds a abstact syntax tree from the parsed input based on a simple name giving strategy. - extremely small code base. (This file contains it all.) This parser was developed as part of the Meta-Meta environment. See http://www.iwriteiam.nl/MM.html */ #define VERSION "1.0 of September 14, 2001." #include #include #include #include #include #ifndef NULL #define NULL 0 #endif typedef int bool; #define TRUE 1 #define FALSE 0 typedef signed long longint; typedef unsigned long longword; typedef unsigned short word; typedef unsigned char byte; #define MALLOC(t) (t*)malloc(sizeof(t)) #define MALLOC_N(N,T) (T *)malloc((N)*sizeof(T)) #define STR_MALLOC(N) (char *)malloc(N+1) #define REALLOC_N(A,N,T) (T *)realloc(A, (N)*sizeof(T)) #define STRCPY(D,S) D = (char *)malloc(strlen(S)+1); strcpy(D,S) /*** Strings ***/ typedef struct hexa_hash_tree_T { byte state; union { char *string; struct hexa_hash_tree_T **children; } data; } hexa_hash_tree_t; byte *keyword_state = NULL; char *string(char *s) { static hexa_hash_tree_t *hash_tree = NULL; hexa_hash_tree_t **r_node = &hash_tree; char *vs = s; int depth; int mode = 0; for (depth = 0; ; depth++) { hexa_hash_tree_t *node = *r_node; if (node == NULL) { node = MALLOC(hexa_hash_tree_t); node->state = 0; STRCPY(node->data.string, s); *r_node = node; keyword_state = &node->state; return node->data.string; } if (node->state != 255) { char *cs = node->data.string; hexa_hash_tree_t **children; word i, v = 0; if (*cs == *s && strcmp(cs+1, s+1) == 0) { keyword_state = &node->state; return node->data.string; } children = MALLOC_N(16, hexa_hash_tree_t*); for (i = 0; i < 16; i++) children[i] = NULL; i = strlen(cs); if (depth <= i) v = ((byte)cs[depth]) & 15; else if (depth <= i*2) v = ((byte)cs[depth-i-1]) >> 4; children[v] = node; node = MALLOC(hexa_hash_tree_t); node->state = 255; node->data.children = children; *r_node = node; } { word v; if (*vs == '\0') { v = 0; if (mode == 0) { mode = 1; vs = s; } } else if (mode == 0) v = ((word)*vs++) & 15; else v = ((word)*vs++) >> 4; r_node = &node->data.children[v]; } } } /*** Trees ***/ typedef struct tree_T { char *type; union { struct list_T *parts; char *str_value; int int_value; double double_value; char char_value; } c; longword refcount; } tree_t, *tree_p; typedef struct list_T { struct list_T *next; tree_p first; } list_t, *list_p; char *tt_str_value, *tt_int_value, *tt_list; /* Special strings, one for each terminal */ char *tt_ident = "identifier"; char *tt_str_value = "string value"; char *tt_int_value = "integer value"; char *tt_double_value = "double value"; char *tt_char_value = "char value"; char *tt_list = "list"; tree_p old_trees = NULL; list_p old_lists = NULL; long alloced_trees = 0; tree_p malloc_tree( void ) { tree_p new; if (old_trees) { new = old_trees; old_trees = (tree_p)old_trees->type; } else new = MALLOC(tree_t); new->refcount = 1; alloced_trees++; return new; } void free_list( list_p list ); void tree_release( tree_p *r_t ) { tree_p t = *r_t; if (t == NULL) return; alloced_trees--; t->refcount--; if (t->refcount == 0) { if ( t->type != tt_ident && t->type != tt_str_value && t->type != tt_int_value && t->type != tt_double_value && t->type != tt_char_value) { list_p list = t->c.parts; while (list != NULL) { list_p next = list->next; free_list(list); list = next; } } t->type = (char*)old_trees; old_trees = t; } *r_t = NULL; } void free_list( list_p list ) { tree_release(&list->first); list->next = old_lists; old_lists = list; } void tree_assign(tree_p *d, tree_p s) { tree_p old_d = *d; *d = s; if (s != NULL) { s->refcount++; alloced_trees++; } if (old_d != NULL) tree_release(&old_d); } void tree_move(tree_p *d, tree_p *s) { tree_p old_d = *d; *d = *s; *s = NULL; if (old_d != NULL) tree_release(&old_d); } static list_p malloc_list( void ) { list_p new; if (old_lists) { new = old_lists; old_lists = old_lists->next; } else new = MALLOC(list_t); new->next = NULL; new->first = NULL; return new; } tree_p make_ident( char *str ) { tree_p tree = malloc_tree(); tree->type = tt_ident; tree->c.str_value = str; return tree; } tree_p make_str_atom( char *str ) { tree_p tree = malloc_tree(); tree->type = tt_str_value; tree->c.str_value = str; return tree; } tree_p make_double_atom( double value ) { tree_p tree = malloc_tree(); tree->type = tt_double_value; tree->c.double_value = value; return tree; } tree_p make_char_atom( char value ) { tree_p tree = malloc_tree(); tree->type = tt_char_value; tree->c.char_value = value; return tree; } tree_p make_int_atom( int value ) { tree_p tree = malloc_tree(); tree->type = tt_int_value; tree->c.int_value = value; return tree; } tree_p make_list( void ) { tree_p tree = malloc_tree(); tree->type = tt_list; tree->c.parts = NULL; return tree; } tree_p make_tree( char *name ) { tree_p tree = malloc_tree(); tree->type = name; tree->c.parts = NULL; return tree; } void insert_element( tree_p tree, tree_p element ) { list_p r_list; r_list = malloc_list(); tree_assign(&r_list->first, element); r_list->next = tree->c.parts; tree->c.parts = r_list; } void add_element( tree_p tree, tree_p element ) { list_p *r_list = &tree->c.parts; while (*r_list != NULL) r_list = &(*r_list)->next; *r_list = malloc_list(); tree_assign(&(*r_list)->first, element); } void drop_last_element( tree_p tree ) { list_p *r_list = &tree->c.parts; if (*r_list == NULL) return; while ((*r_list)->next != NULL) r_list = &(*r_list)->next; free_list(*r_list); *r_list = NULL; } inline list_p elements( tree_p tree ) { return tree->c.parts; } inline bool is_a_ident( tree_p tree ) { return tree && tree->type == tt_ident; } inline bool is_ident( tree_p tree, char *ident ) { return tree && tree->type == tt_ident && tree->c.str_value == ident; } inline bool equal_ident( tree_p tree, char *ident ) { return tree->c.str_value == ident; } inline bool is_a_str( tree_p tree ) { return tree && tree->type == tt_str_value; } inline bool is_str( tree_p tree, char *str ) { return tree && tree->type == tt_str_value && tree->c.str_value == str; } inline bool is_a_int( tree_p tree ) { return tree && tree->type == tt_int_value; } inline bool is_a_double( tree_p tree ) { return tree && tree->type == tt_double_value; } inline bool is_a_char( tree_p tree ) { return tree && tree->type == tt_char_value; } inline bool is_a_list( tree_p tree ) { return tree && tree->type == tt_list; } inline bool is_tree( tree_p tree, char *name ) { return tree && tree->type == name; } inline bool equal_tree( tree_p tree, char *name ) { return tree->type == name; } tree_p part( tree_p tree, int i ) { list_p parts = tree->c.parts; for (i--; parts && i > 0; parts = parts->next, i--); return parts ? parts->first : NULL; } int print_tree_depth = 0; void print_list(FILE *f, list_p list, bool compact); void print_tree( FILE *f, tree_p tree, bool compact ) { if (tree == NULL) { fprintf(f, "[NULL]"); } else if (is_a_ident(tree)) { fprintf(f, "%s", tree->c.str_value); } else if (is_a_str(tree)) { fprintf(f, "\"%s\"", tree->c.str_value); } else if (is_a_int(tree)) { fprintf(f, "%d", tree->c.int_value); } else if (is_a_double(tree)) { fprintf(f, "%f", tree->c.double_value); } else if (is_a_char(tree)) { fprintf(f, "'%c'", tree->c.char_value); } else { fprintf(f, "%s(", tree->type); print_tree_depth += strlen(tree->type) + 1; print_list(f, tree->c.parts, compact); print_tree_depth -= strlen(tree->type) + 1; fprintf(f, ")"); } if (print_tree_depth == 0 && !compact) fprintf(f, "\n"); }; void print_list(FILE *f, list_p list, bool compact) { list_p l; bool first = TRUE; for (l = list; l; l = l->next) { if (!first) { if (compact) fprintf(f, ","); else fprintf(f, ",\n%*s", print_tree_depth, ""); } first = FALSE; /* fprintf(f, "[%lx]", (longword)l); */ print_tree(f, l->first, compact); } } static void print_tree_rec( FILE *f, tree_p tree ); void print_tree_to_c( FILE *f, tree_p tree, char *name ) { fprintf(f, "void init_%s( tree_p *root )\n{ int v = 0;\n", name); fprintf(f, " char *keyword_v;\n"); fprintf(f, " int v = 0;\n"); fprintf(f, " tree_p tt[MAX_NR_TREE];\n"); fprintf(f, "\n"); fprintf(f, "#define ID(s) tt[v]=make_ident(string(s));\n"); fprintf(f, "#define STR(s) tt[v]=make_str_atom(string(s));\n"); fprintf(f, "#define KEYWORD(s) tt[v]=make_str_atom(string(s));" " *keyword_state |= 1;\n"); fprintf(f, "#define INT(i) tt[v]=make_int_atom(i);\n"); fprintf(f, "#define DOUBLE(i) tt[v]=make_double_atom(i);\n"); fprintf(f, "#define CHAR(i) tt[v]=make_char_atom(i);\n"); fprintf(f, "#define TREE(n) tt[v]=make_tree(string(n)); v++;\n"); fprintf(f, "#define LIST tt[v]=make_list(); v++;\n"); fprintf(f, "#define NONE tt[v]=NULL;\n"); fprintf(f, "#define SEP add_element(tt[v-1],tt[v]);\n"); fprintf(f, "#define CLOSE add_element(tt[v-1],tt[v]); v--;\n"); fprintf(f, "\n "); print_tree_rec( f, tree ); fprintf(f, "\n *root = tt[0];\n}\n\n"); } static void print_tree_rec( FILE *f, tree_p tree ) { if (tree == NULL) fprintf(f, "NONE "); else if (is_a_ident(tree)) { fprintf(f, "ID(\"%s\") ", tree->c.str_value); } else if (is_a_str(tree)) { fprintf(f, "KEYWORD(\"%s\") ", tree->c.str_value); } else if (is_a_int(tree)) { fprintf(f, "INT(%d) ", tree->c.int_value); } else if (is_a_double(tree)) { fprintf(f, "DOUBLE(%f) ", tree->c.double_value); } else if (is_a_char(tree)) { fprintf(f, "CHAR('%c') ", tree->c.char_value); } else { list_p l; if (is_a_list(tree)) fprintf(f, "LIST "); else fprintf(f, "TREE(\"%s\") ", tree->type); if (tree->c.parts) { print_tree_depth++; for (l = tree->c.parts; l; l = l->next) { print_tree_rec(f, l->first); if (l->next) fprintf(f, "SEP\n%*.*s ", print_tree_depth, print_tree_depth, ""); } print_tree_depth--; } fprintf(f, "CLOSE "); } }; /*** File IO ***/ char *info = NULL; longword file_pos; longword file_len; #define _eof (file_pos >= file_len) char *buffer = NULL; void _assign(FILE *the_file) { int fh = fileno(the_file); file_len = lseek(fh, 0L, SEEK_END); lseek(fh, 0L, SEEK_SET); buffer = (char*)malloc(file_len+2); file_len = read(fh, buffer, file_len); buffer[file_len] = '\0'; file_pos = 0; info = buffer; } void _unassign() { if (buffer) free(buffer); buffer = NULL; } inline void _set_file_pos(longword new_file_pos) { file_pos = new_file_pos; info = buffer + file_pos; } inline void _next() { if (file_pos < file_len) { file_pos++; info++; } } inline void _advance(word steps) { file_pos += steps; if (file_pos > file_len) file_pos = file_len; info = buffer + file_pos; } void _print_state() { printf("[0,%ld,%ld] = `%c'\n", file_pos, file_len, *info); } /*** Scanner ***/ int debug_scan = 0; char *current_nt; word start_line; word start_column; typedef struct { longword file_pos; word cur_line; word cur_column; } scan_pos_t; #define _DEBUG(S) #define _DEBUG_P1(S,A) #define _DEBUG_P2(S,A,B) #define _DEBUG_P3(S,A,B,C) #define _DEBUG_P4(S,A,B,C,D) #define DEBUG(S) if (debug_scan) printf(S) /*#define DEBUG_P1(S,A) if (debug_scan) printf(S,A)*/ #define DEBUG_P2(S,A,B) if (debug_scan) printf(S,A,B) #define DEBUG_P3(S,A,B,C) if (debug_scan) printf(S,A,B,C) #define DEBUG_P4(S,A,B,C,D) if (debug_scan) printf(S,A,B,C,D) #define DEBUG_P6(S,A,B,C,D,E,F) if (debug_scan) printf(S,A,B,C,D,E,F) #define DEBUG_F(S) if (debug_scan > 1) printf(S) #define DEBUG_F_P1(S,A) if (debug_scan > 1) printf(S,A) #define DEBUG_F_P2(S,A,B) if (debug_scan > 1) printf(S,A,B) #define DEBUG_F_P3(S,A,B,C) if (debug_scan > 1) printf(S,A,B,C) #define DEBUG_F_P4(S,A,B,C,D) if (debug_scan > 1) printf(S,A,B,C,D) #define DEBUG_F_P6(S,A,B,C,D,E,F) if (debug_scan > 1) printf(S,A,B,C,D,E,F) void skip_space(void); static char *start_info() { static char buff[13]; int i; char *s; for (i = 0, s = info; i < 10 && *s; s++) if (*s == '\n') { buff[i++] = '\\'; buff[i++] = 'n'; } else if (*s == '\t') { buff[i++] = '\\'; buff[i++] = 't'; } else buff[i++] = *s; buff[i] = '\0'; return buff; } word cur_line = 1; word cur_column = 1; longword f_file_pos = 0; word f_line = 1; word f_column = 1; #define MAX_EXP_SYM 200 char *exp_syms[MAX_EXP_SYM]; char *exp_in_nt[MAX_EXP_SYM]; int nr_exp_syms = 0; char *current_nt = NULL; longword last_space_pos = (longword)-1; scan_pos_t last_space_end_pos; longword last_string_pos = (longword)-1; char *last_string; scan_pos_t last_string_end_pos; longword last_ident_pos = (longword)-1; char *last_ident; bool last_ident_is_keyword; scan_pos_t last_ident_end_pos; void init_scan(FILE *the_file) { _assign(the_file); cur_line = 1; cur_column = 1; f_file_pos = 0; f_line = 1; f_column = 1; cur_line = 1; cur_column = 1; nr_exp_syms = 0; last_space_pos = (longword)-1; last_string_pos = (longword)-1; last_ident_pos = (longword)-1; skip_space(); } void print_last_pos() { int i; printf("%d.%d Expected:\n", f_line, f_column); for (i = 0; i < nr_exp_syms; i++) { bool unique = TRUE; int j; for (j = 0; unique && j < i; j++) if ( !strcmp(exp_syms[i], exp_syms[j]) && exp_in_nt[i] == exp_in_nt[j]) unique = FALSE; if (unique) { printf(" %s", exp_syms[i]); if (exp_in_nt[i]) printf(" in %s", exp_in_nt[i]); printf("\n"); } } } static void expected_string(char *s) { if (file_pos < f_file_pos) return; if (file_pos > f_file_pos) { nr_exp_syms = 0; f_file_pos = file_pos; f_line = cur_line; f_column = cur_column; } if (nr_exp_syms >= MAX_EXP_SYM) return; exp_syms[nr_exp_syms] = s; exp_in_nt[nr_exp_syms] = current_nt; nr_exp_syms++; } void save_pos(scan_pos_t *sp) { sp->file_pos = file_pos; sp->cur_line = cur_line; sp->cur_column = cur_column; } void restore_pos(scan_pos_t *sp) { if (sp->file_pos == file_pos) return; _set_file_pos(sp->file_pos); cur_line = sp->cur_line; cur_column = sp->cur_column; /* _print_state(); */ } void skip_space() { if (file_pos == last_space_pos) { restore_pos(&last_space_end_pos); return; } last_space_pos = file_pos; for(;;) { _DEBUG_P1("now: `%c'\n", *info); if (*info == ' ') { cur_column++; _next(); } else if (*info == '\t') { cur_column = ((cur_column + 8) % 8) * 8; _next(); } else if (info[0] == '\n') { cur_column = 1; cur_line++; _next(); } else if (info[0] == '/' && info[1] == '/') { while (!_eof && info[0] != '\n') { _next(); cur_column++; } } else if (info[0] == '/' && info[1] == '*') { scan_pos_t sp; save_pos(&sp); while (!_eof && info[0] != '*' && info[1] != '/') { _next(); cur_column++; } if (_eof) { printf("Warning: %d.%d comment not terminated\n", sp.cur_line, sp.cur_column); } } else break; } save_pos(&last_space_end_pos); /* _print_state(); */ } bool accept_eof() { if (_eof) { DEBUG_P3("%d.%d: accept_eof() for: `%s'\n", cur_line, start_column, start_info()); return TRUE; } else { DEBUG_F_P3("%d.%d: accept_eof() failed for: `%s'\n", cur_line, cur_column, start_info()); expected_string(""); return FALSE; } } bool accept_i_string(char **string, bool c_string) { scan_pos_t sp; int i; if (_eof || info[0] != '"') { expected_string(""); DEBUG_F_P3("%d.%d: accept_string() failed for: `%s'\n", cur_line, cur_column, start_info()); return FALSE; } save_pos(&sp); i = 0; _next(); for (;;) { if (_eof || *info == '\0' || *info == '\n') { printf("Error: %d.%d incorrectly terminated string\n", sp.cur_line, sp.cur_column); break; } else if (*info == '\\') { _next(); if (0 <= *info && *info <= 7) { char d1 = *info; _next(); if (0 <= *info && *info <= 7) { _next(); if ((d1 == '0' || d1 == '1') && 0 <= *info && *info <= 7) _next(); } } else _next(); i++; } else if (*info == '"') { if (c_string) { _next(); skip_space(); if (_eof || *info != '"') break; _next(); } else break; } else { i++; _next(); } } *string = STR_MALLOC(i); restore_pos(&sp); i = 0; _next(); for (;;) { if (_eof || *info == '\0' || *info == '\n') { break; } else if (*info == '\\') { _next(); cur_column++; if (!_eof && 0 <= *info && *info <= 7) { char d1 = *info, v = *info - '0'; _next(); cur_column++; if (!_eof && 0 <= *info && *info <= 7) { v = v*8 + *info - '0'; _next(); cur_column++; if ( !_eof && (d1 == '0' || d1 == '1') && 0 <= *info && *info <= 7) { v = v*8 + *info - '0'; _next(); cur_column++; } } (*string)[i++] = v; } else if (!_eof) { if (*info == 'n') (*string)[i++] = '\n'; else if (*info == 't') (*string)[i++] = '\t'; else (*string)[i++] = *info; _next(); } } else if (*info == '"') { _next(); cur_column++; if (c_string) { skip_space(); if (_eof || *info != '"') break; cur_column++; _next(); } else break; } else { (*string)[i++] = *info; cur_column++; _next(); } } (*string)[i] = '\0'; skip_space(); DEBUG_P3("%d.%d: accept_string(\"%s\")\n", sp.cur_line, sp.cur_column, *string); return TRUE; } bool accept_string(char **string) { bool result; longword start_pos; if (file_pos == last_string_pos) { *string = last_string; restore_pos(&last_string_end_pos); return TRUE; } start_pos = file_pos; result = accept_i_string(string, FALSE); if (result) { last_string_pos = start_pos; last_string = *string; save_pos(&last_string_end_pos); return TRUE; } return FALSE; } bool accept_cstring(char **string) { return accept_i_string(string, TRUE); } bool accept_ident(char **ident, char keyword_flag) { int i; char s; if (file_pos == last_ident_pos) { if (last_ident_is_keyword) { expected_string(""); DEBUG_F_P3("%d.%d: accept_ident() failed for `%s'\n", cur_line, cur_column, start_info()); return FALSE; } start_column = cur_column; *ident = last_ident; restore_pos(&last_ident_end_pos); } else { longword pos = file_pos; start_line = cur_line; start_column = cur_column; if (_eof || (!isalpha(*info) && *info != '_')) { expected_string(""); DEBUG_F_P3("%d.%d: accept_ident() failed for `%s'\n", cur_line, start_column, start_info()); return FALSE; } i = 1; while (isalpha(info[i]) || isdigit(info[i]) || info[i] == '_') i++; s = info[i]; info[i] = '\0'; *ident = string(info); if (*keyword_state & keyword_flag) { expected_string(""); DEBUG_F_P3("%d.%d: accept_ident() failed for `%s' is keyword\n", cur_line, start_column, info); info[i] = s; return FALSE; } info[i] = s; _advance(i); cur_column += i; last_ident_pos = pos; last_ident = *ident; last_ident_is_keyword = FALSE; skip_space(); save_pos(&last_ident_end_pos); } DEBUG_P3("%d.%d: accept_ident(%s)\n", start_line, cur_column, *ident); return TRUE; } bool accept_char(char *v) { if (_eof || *info != '\'') { expected_string(""); DEBUG_F_P3("%d.%d: accept_char() failed for `%s'\n", cur_line, cur_column, start_info()); return FALSE; } start_column = cur_column; _next(); if (*info == '\\') { _next(); cur_column++; if (0 <= *info && *info <= 7) { char d1 = *info; *v = *info - '0'; _next(); cur_column++; if (0 <= *info && *info <= 7) { *v = (*v)*8 + *info - '0'; _next(); cur_column++; if ((d1 == '0' || d1 == '1') && 0 <= *info && *info <= 7) { *v = (*v)*8 + *info - '0'; _next(); cur_column++; } } } else if (*info == 'n') *v = '\n'; else if (*info == 't') *v = '\t'; else *v = *info; } else *v = *info; _next(); cur_column++; if (*info != '\'') { printf("Error: %d.%d ill terminated character\n", cur_line, start_column); } else { _next(); cur_column++; } skip_space(); DEBUG_P3("%d.%d: accept_char(%c)\n", cur_line, start_column, *v); return TRUE; } bool accept_int(longint *v) { int i = 0; if (_eof || ( !isdigit(*info) && ( (*info != '+' && *info != '-') || !isdigit(info[1])))) { expected_string(""); DEBUG_F_P3("%d.%d: accept_int() failed for `%s'\n", cur_line, cur_column, start_info()); return FALSE; } start_column = cur_column; if (info[i] == '-' || info[i] == '+') i++; if (info[i] == '0' && info[i+1] == 'x') { i += 2; while ( isdigit(info[i]) || ('a' <= info[i] && info[i] <= 'f') || ('A' <= info[i] && info[i] <= 'F')) i++; } else { while (isdigit(info[i])) i++; if (info[i] == '.') { expected_string(""); DEBUG_F_P3("%d.%d: accept_int() found double `%s'\n", cur_line, start_column, start_info()); return FALSE; } if (info[i] == 'L') i++; } sscanf(info, "%ld", v); _advance(i); cur_column += i; skip_space(); DEBUG_P3("%d.%d: accept_int(%ld)\n", cur_line, start_column, *v); return TRUE; } bool accept_double(double *v) { int i = 0; if (_eof || ( !isdigit(*info) && *info != '.' && ( (*info != '+' && *info != '-') || (!isdigit(info[1]) && *info != '.')))) { expected_string(""); DEBUG_F_P3("%d.%d: accept_double() failed for `%s'\n", cur_line, cur_column, start_info()); return FALSE; } start_column = cur_column; if (*info == '-' || *info == '+') i++; while (isdigit(*info)) i++; if (*info != '.') { expected_string(""); DEBUG_P3("%d.%d: accept_double() found int `%s'\n", cur_line, start_column, start_info()); return FALSE; } i++; while (isdigit(*info)) i++; sscanf(info, "%lf", v); _advance(i); cur_column += i; skip_space(); DEBUG_P3("%d.%d: accept_double(%f)\n", cur_line, start_column, *v); return TRUE; } bool accept_symbol(char *sym) { char *v, *s; if (file_pos == last_ident_pos) { if (sym == last_ident) { DEBUG_F_P3("%d.%d: accept_sym(%s)", cur_line, cur_column, sym); restore_pos(&last_ident_end_pos); return TRUE; } else if (!last_ident_is_keyword) { expected_string(sym); DEBUG_F_P4("%d.%d: accept_sym(%s) failed for `%s'\n", cur_line, cur_column, sym, start_info()); return FALSE; } } for (v = info, s = sym; *v == *s; v++, s++); if (*s != '\0') { expected_string(sym); DEBUG_F_P4("%d.%d: accept_sym(%s) failed for `%s'\n", cur_line, cur_column, sym, start_info()); return FALSE; } DEBUG_P3("%d.%d: accept_sym(%s)\n", cur_line, cur_column, sym); last_ident_pos = file_pos; _advance(v - info); cur_column += v - info; skip_space(); last_ident = sym; last_ident_is_keyword = TRUE; save_pos(&last_ident_end_pos); return TRUE; } bool accept_symbol_single_char(char *sym) { if (*info != *sym) { expected_string(sym); DEBUG_F_P4("%d.%d: accept_sym(%s) failed for `%s'\n", cur_line, cur_column, sym, start_info()); return FALSE; } DEBUG_P3("%d.%d: accept_sym(%s)\n", cur_line, cur_column, sym); _next(); cur_column++; skip_space(); return TRUE; } bool accept_keyword(char *sym) { int i; char *v, *s; for (v = info, s = sym; *v == *s; v++, s++); if (*s != '\0' || isalpha(*v) || isdigit(*v) || *v == '_') { expected_string(sym); DEBUG_F_P4("%d.%d: accept_keyword(%s) failed `%s'\n", cur_line, cur_column, sym, start_info()); return FALSE; } DEBUG_P3("%d.%d: accept_keyword(%s)\n", cur_line, cur_column, sym); i = v - info; _advance(i); cur_column += i; skip_space(); return TRUE; } bool accept_xml_string(char **string) { int i; char s; if (file_pos == last_string_pos) { start_column = cur_column; *string = last_string; restore_pos(&last_string_end_pos); } else { start_line = cur_line; start_column = cur_column; if (_eof || *info == '<') { expected_string(""); DEBUG_F_P3("%d.%d: accept_xml_string() failed for `%s'\n", cur_line, start_column, start_info()); return FALSE; } i = 0; for (i = 0; !_eof && info[i] != '<'; i++) { if (info[i] == '\n') { cur_line++; cur_column = 1; } } s = info[i]; info[i] = '\0'; *string = STR_MALLOC(i); strcpy(*string, info); info[i] = s; _advance(i); last_ident_pos = file_pos; last_ident = *string; last_ident_is_keyword = FALSE; skip_space(); save_pos(&last_string_end_pos); } DEBUG_P3("%d.%d: accept_xml_string(%s)\n", start_line, cur_column, *string); return TRUE; } /*** Parser ***/ typedef struct val_T { struct val_T *prev; tree_p last; } val_t, *val_p; typedef struct or_rule_T { struct or_rule_T *next; struct rule_T *rule; char *tree_name; } or_rule_t, *or_rule_p; typedef struct non_terminal_t { struct non_terminal_t *next; char *name; or_rule_p first; or_rule_p recursive; } non_terminal_t, *non_terminal_p; typedef struct rule_T { struct rule_T *next; bool optional; bool sequential; char *chain_symbol; int kind; union { non_terminal_p non_terminal; or_rule_p or_rule; char *str_value; } info; long last_fail_pos; } rule_t, *rule_p; #define RK_NT 0 #define RK_LIT 1 #define RK_OR_RULE 2 #define RK_T_EOF 10 #define RK_T_STRING 11 #define RK_T_CSTRING 12 #define RK_T_INT 13 #define RK_T_DOUBLE 14 #define RK_T_CHAR 15 #define RK_T_IDENT 16 non_terminal_p find_nt(char *name); rule_p make_rule(list_p rules); or_rule_p make_or_rule(list_p or); void make_all_nt(tree_p root); bool parse_nt(non_terminal_p non_term, tree_p *rtree); bool parse_or(or_rule_p or, tree_p *rtree); bool parse_rule(rule_p rule, val_p prev_parts, char *tree_name, tree_p *rtree); bool parse_seq(rule_p rule, char *chain_sym, tree_p seq, val_p prev_parts, char *tree_name, tree_p *rtree) ; static void print_or(FILE *f, or_rule_p or); static void print_rule(FILE *f, rule_p rule); static char *str_root, *str_nt_def, *str_ident, *str_or_rule, *str_rule, *str_opt_elem, *str_list_elem, *str_opt, *str_seq, *str_list, *str_chain, *str_prim_elem, *str_string, *str_cstring, *str_int, *str_double, *str_char, *str_eof; non_terminal_p find_nt(char *name) { static non_terminal_p all_nt = NULL; non_terminal_p *p_nt = &all_nt; while (*p_nt != NULL && (*p_nt)->name != name) p_nt = &((*p_nt)->next); if (*p_nt == NULL) { *p_nt = MALLOC(non_terminal_t); (*p_nt)->name = name; (*p_nt)->first = NULL; (*p_nt)->recursive = NULL; (*p_nt)->next = NULL; } return *p_nt; } rule_p make_rule(list_p rule) { rule_p result; tree_p elem; /* only used as pointer */ if (rule == NULL) return NULL; result = MALLOC(rule_t); result->optional = FALSE; result->sequential = FALSE; result->chain_symbol = NULL; result->last_fail_pos = (long)-1; elem = rule->first; /* Is the first element optional? */ if (equal_tree(elem, str_opt)) { result->optional = TRUE; elem = elements(elem)->first; } /* Is it a sequential, chain or list? */ if (equal_tree(elem, str_seq)) { result->sequential = TRUE; elem = elements(elem)->first; } else if (equal_tree(elem, str_chain)) { result->sequential = TRUE; result->chain_symbol = elements(elem)->next->first->c.str_value; elem = elements(elem)->first; } else if (equal_tree(elem, str_list)) { result->sequential = TRUE; result->chain_symbol = ","; elem = elements(elem)->first; } if (is_a_ident(elem)) { if (equal_ident(elem, str_eof)) result->kind = RK_T_EOF; else if (equal_ident(elem, str_string)) result->kind = RK_T_STRING; else if (equal_ident(elem, str_cstring)) result->kind = RK_T_STRING; else if (equal_ident(elem, str_int)) result->kind = RK_T_INT; else if (equal_ident(elem, str_double)) result->kind = RK_T_DOUBLE; else if (equal_ident(elem, str_char)) result->kind = RK_T_CHAR; else if (equal_ident(elem, str_ident)) result->kind = RK_T_IDENT; else { result->kind = RK_NT; result->info.non_terminal = find_nt(elem->c.str_value); } } else if (is_a_str(elem)) { result->kind = RK_LIT; result->info.str_value = elem->c.str_value; } else if (is_a_list(elem)) { result->kind = RK_OR_RULE; result->info.or_rule = make_or_rule(elements(elem)); } result->next = make_rule(rule->next); return result; } or_rule_p make_or_rule(list_p or) { if (or == NULL) return NULL; { tree_p rule = or->first; /* only used as pointer */ if (is_tree(rule, str_rule)) { list_p elem = elements(rule); or_rule_p result = MALLOC(or_rule_t); if (elem->first == NULL) { result->rule = NULL; result->tree_name = NULL; } else { result->rule = make_rule(elements(elem->first)); result->tree_name = is_a_ident(elem->next->first) ? elem->next->first->c.str_value : NULL; } result->next = make_or_rule(or->next); return result; } } return make_or_rule(or->next); } void make_all_nt(tree_p root) { list_p rules; for (rules = root->c.parts; rules; rules = rules->next) if (is_tree(rules->first, str_nt_def)) { char *nt_name = part(rules->first, 1)->c.str_value; non_terminal_p nt = find_nt(nt_name); or_rule_p *p_first = &nt->first; or_rule_p *p_recursive = &nt->recursive; list_p or = part(rules->first, 2)->c.parts; while (*p_first != NULL) p_first = &(*p_first)->next; while (*p_recursive != NULL) p_recursive = &(*p_recursive)->next; for (; or != NULL; or = or->next) { tree_p rule = or->first; /* only used as pointer */ if (is_tree(rule, str_rule)) { list_p elem = elements(rule); or_rule_p new = MALLOC(or_rule_t); if (elem->first == NULL) { new->rule = NULL; new->tree_name = NULL; new->next = NULL; *p_first = new; p_first = &new->next; } else { list_p parts = elements(elem->first); new->tree_name = is_a_ident(elem->next->first) ? elem->next->first->c.str_value : NULL; new->next = NULL; if (parts == NULL || !is_ident(parts->first, nt_name)) { new->rule = make_rule(parts); *p_first = new; p_first = &new->next; } else { new->rule = make_rule(parts->next); *p_recursive = new; p_recursive = &new->next; } } } } } } enum success_t { s_unknown, s_fail, s_success } ; typedef struct solution_T { struct solution_T *next; char *nt; enum success_t success; tree_p result; scan_pos_t sp; } solution_t; solution_t **solutions = NULL; void init_solutions() { longword i; solutions = MALLOC_N(file_len+1, solution_t*); for (i = 0; i < file_len+1; i++) solutions[i] = NULL; } void free_solutions() { longword i; for (i = 0; i < file_len+1; i++) { solution_t *sol = solutions[i]; while (sol != NULL) { solution_t *next_sol = sol->next; tree_release(&sol->result); free(sol); sol = next_sol; } } free(solutions); solutions = NULL; } solution_t *find_solution(longword filepos, char *nt) { solution_t *sol; if (filepos > file_len) filepos = file_len; for (sol = solutions[filepos]; sol != NULL; sol = sol->next) if ( sol->nt == nt ) return sol; sol = MALLOC(solution_t); sol->next = solutions[filepos]; sol->nt = nt; sol->success = s_unknown; sol->result = NULL; solutions[filepos] = sol; return sol; } static int grammar_level = 2; static char keyword_flag = 1, new_keyword_flag = 2; static int depth = 0; static bool debug_parse = FALSE; static bool debug_nt = FALSE; #define DEBUG_ENTER(X) if (debug_parse) { DEBUG_TAB; printf("Enter: %s", X); depth += 2; } #define DEBUG_ENTER_P1(X,A) if (debug_parse) { DEBUG_TAB; printf("Enter: "); printf(X,A); depth += 2; } #define DEBUG_EXIT(X) if (debug_parse) { depth -=2; DEBUG_TAB; printf("Leave: %s", X); } #define DEBUG_EXIT_P1(X,A) if (debug_parse) { depth -=2; DEBUG_TAB; printf("Leave: "); printf(X,A); } #define DEBUG_TAB if (debug_parse) printf("%*.*s", depth, depth, "") #define DEBUG_NL if (debug_parse) printf("\n") #define DEBUG_PT(X) if (debug_parse) print_tree(stdout, X, TRUE) #define DEBUG_PO(X) if (debug_parse) print_or(stdout, X) #define DEBUG_PR(X) if (debug_parse) print_rule(stdout, X) #define DEBUG_(X) if (debug_parse) printf(X) #define DEBUG_P1(X,A) if (debug_parse) printf(X,A) bool parse_nt( non_terminal_p non_term, tree_p *rtree) { or_rule_p or; char *surr_nt = current_nt; char *nt = non_term->name; solution_t *sol = find_solution(file_pos, nt); DEBUG_ENTER_P1("parse_nt(%s)", nt); DEBUG_NL; if (sol->success == s_success) { DEBUG_EXIT_P1("parse_nt(%s) SUCCESS", nt); DEBUG_NL; tree_assign(rtree, sol->result); restore_pos(&sol->sp); return TRUE; } else if (sol->success == s_fail) { DEBUG_EXIT_P1("parse_nt(%s) FAIL", nt); DEBUG_NL; return FALSE; } current_nt = nt; if (debug_nt) { printf("%*.*s", depth, depth, ""); printf("Enter: %s\n", nt); depth += 2; } for ( or = non_term->first; or != NULL; or = or->next ) if (parse_rule(or->rule, (val_p)NULL, or->tree_name, rtree)) break; if (or != NULL) { for(;;) { val_t val; val.prev = NULL; val.last = NULL; tree_move(&val.last, rtree); for ( or = non_term->recursive; or != NULL; or = or->next ) if (parse_rule(or->rule, &val, or->tree_name, rtree)) { tree_release(&val.last); break; } if (or == NULL) { tree_move(rtree, &val.last); break; } tree_release(&val.last); } DEBUG_EXIT_P1("parse_nt(%s) =", nt); DEBUG_PT(*rtree); DEBUG_NL; if (debug_nt) { depth -= 2; printf("%*.*s", depth, depth, ""); printf("Parsed: %s\n", nt); } current_nt = surr_nt; tree_assign(&sol->result, *rtree); sol->success = s_success; save_pos(&sol->sp); return TRUE; } DEBUG_EXIT_P1("parse_nt(%s) - failed", nt); DEBUG_NL; if (debug_nt) { depth -= 2; printf("%*.*s", depth, depth, ""); printf("Failed: %s\n", nt); } current_nt = surr_nt; sol->success = s_fail; return FALSE; } bool parse_or(or_rule_p or, tree_p *rtree) { DEBUG_ENTER("parse_or: "); DEBUG_PO(or); DEBUG_NL; for ( ; or != NULL; or = or->next ) if (parse_rule(or->rule, (val_p)NULL, or->tree_name, rtree)) { DEBUG_EXIT("parse_or = "); DEBUG_PT(*rtree); DEBUG_NL; return TRUE; } DEBUG_EXIT("parse_or: failed"); DEBUG_NL; return FALSE; } bool parse_rule(rule_p rule, val_p prev_parts, char *tree_name, tree_p *rtree) { bool optional, sequential; char *chain_sym; longword *last_fail_pos; scan_pos_t sp; bool try = FALSE; val_t val; DEBUG_ENTER("parse_rule: "); DEBUG_PR(rule); DEBUG_NL; /* At the end of the rule: */ if (rule == NULL) { if (tree_name != NULL) { if ( prev_parts != NULL && prev_parts->prev == NULL && is_a_list(prev_parts->last) && 0) { tree_assign(rtree, prev_parts->last); (*rtree)->type = tree_name; } else { /* make tree of previous elements: */ *rtree = make_tree(tree_name); while (prev_parts) { insert_element(*rtree, prev_parts->last); prev_parts = prev_parts->prev; } } } else { /* group previous elements into a list, if more than one: */ if (prev_parts == NULL) *rtree = NULL; else if (prev_parts->prev == NULL) tree_assign(rtree, prev_parts->last); else { *rtree = make_list(); while (prev_parts) { insert_element(*rtree, prev_parts->last); prev_parts = prev_parts->prev; } } } DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; return TRUE; } optional = rule->optional, sequential = rule->sequential; chain_sym = rule->chain_symbol; /* Did we fail the last time at this position? */ if (rule->last_fail_pos == file_pos) { DEBUG_EXIT("parse_rule - BREAK "); DEBUG_NL; return FALSE; } last_fail_pos = &rule->last_fail_pos; save_pos(&sp); /* Try to accept first symbol */ { tree_p t = NULL; switch( rule->kind ) { case RK_T_EOF: try = accept_eof(); if (try) t = make_str_atom(string("EOF")); break; case RK_T_STRING: { char *v; try = accept_string(&v); if (try) { if (grammar_level > 1) { t = make_str_atom(string(v)); *keyword_state |= new_keyword_flag; } else t = make_str_atom(v); } break; } case RK_T_CSTRING: { char *v; try = accept_cstring(&v); if (try) t = make_str_atom(v); break; } case RK_T_INT: { longint v; try = accept_int(&v); if (try) t = make_int_atom(v); break; } case RK_T_DOUBLE: { double v; try = accept_double(&v); if (try) t = make_double_atom(v); break; } case RK_T_CHAR: { char v; try = accept_char(&v); if (try) t = make_char_atom(v); break; } case RK_T_IDENT: { char *v; try = accept_ident(&v, keyword_flag); if (try) t = make_ident(v); break; } case RK_NT: try = parse_nt(rule->info.non_terminal, &t); break; case RK_LIT: { char *ss = rule->info.str_value; if (isalpha(*ss) ? accept_keyword(ss) : accept_symbol(ss)) { if (sequential) { tree_p seq = make_list(); if (parse_seq(rule, chain_sym, seq, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&t); tree_release(&seq); return TRUE; } tree_release(&seq); } else if (parse_rule(rule->next, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&t); return TRUE; } } break; } case RK_OR_RULE: try = parse_or(rule->info.or_rule, &t); break; default: try = FALSE; } if (try) { /* We succeded in parsing the first element */ if (sequential) { tree_p seq = make_list(); add_element(seq, t); if (parse_seq(rule, chain_sym, seq, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&t); tree_release(&seq); return TRUE; } tree_release(&seq); } else { val.last = NULL; tree_assign(&val.last, t); val.prev = prev_parts; prev_parts = &val; if (parse_rule(rule->next, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&val.last); tree_release(&t); return TRUE; } tree_release(&val.last); } } tree_release(&t); } if (optional) { restore_pos(&sp); /* First element was optional: */ val.last = NULL; val.prev = prev_parts; prev_parts = &val; if (parse_rule(rule->next, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_rule = "); DEBUG_PT(*rtree); DEBUG_NL; return TRUE; } } restore_pos(&sp); *last_fail_pos = file_pos; DEBUG_EXIT_P1("parse_rule - failed at %ld", file_pos); DEBUG_NL; return FALSE; } bool parse_seq(rule_p rule, char *chain_sym, tree_p seq, val_p prev_parts, char *tree_name, tree_p *rtree) { scan_pos_t sp; bool try = FALSE; val_t val; DEBUG_ENTER("parse_seq: "); DEBUG_PR(rule); DEBUG_NL; save_pos(&sp); /* Try to accept first symbol */ if (chain_sym == NULL || accept_symbol(chain_sym)) { tree_p t = NULL; switch( rule->kind ) { case RK_T_EOF: try = accept_eof(); if (try) t = make_str_atom(string("EOF")); break; case RK_T_STRING: { char *v; try = accept_string(&v); if (try) { if (grammar_level > 1) { t = make_str_atom(string(v)); *keyword_state |= new_keyword_flag; } else t = make_str_atom(v); } break; } case RK_T_CSTRING: { char *v; try = accept_cstring(&v); if (try) t = make_str_atom(v); break; } case RK_T_INT: { longint v; try = accept_int(&v); if (try) t = make_int_atom(v); break; } case RK_T_DOUBLE: { double v; try = accept_double(&v); if (try) t = make_double_atom(v); break; } case RK_T_CHAR: { char v; try = accept_char(&v); if (try) t = make_char_atom(v); break; } case RK_T_IDENT: { char *v; try = accept_ident(&v, keyword_flag); if (try) t = make_ident(v); break; } case RK_NT: try = parse_nt(rule->info.non_terminal, &t); break; case RK_LIT: { char *ss = rule->info.str_value; try = isalpha(*ss) ? accept_keyword(ss) : accept_symbol(ss); break; } case RK_OR_RULE: try = parse_or(rule->info.or_rule, &t); break; default: try = FALSE; } if (try) { /* We succeded in parsing the first element */ add_element(seq, t); if (parse_seq(rule, chain_sym, seq, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_seq = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&t); return TRUE; } drop_last_element(seq); } tree_release(&t); } restore_pos(&sp); val.last = NULL; tree_assign(&val.last, seq); val.prev = prev_parts; prev_parts = &val; if (parse_rule(rule->next, prev_parts, tree_name, rtree)) { DEBUG_EXIT("parse_seq = "); DEBUG_PT(*rtree); DEBUG_NL; tree_release(&val.last); return TRUE; } restore_pos(&sp); DEBUG_EXIT_P1("parse_seq - failed at %ld", file_pos); DEBUG_NL; tree_release(&val.last); return FALSE; } void print_or(FILE *f, or_rule_p or) { bool first = TRUE; for (; or; or = or->next) { if (!first) fprintf(f, "|"); first = FALSE; print_rule(f, or->rule); if (or->tree_name != NULL) fprintf(f, "[%s]", or->tree_name); } } void print_rule(FILE *f, rule_p rule) { bool optional, sequential; char *chain_sym; if (rule == NULL) return; optional = rule->optional, sequential = rule->sequential; chain_sym = rule->chain_symbol; switch(rule->kind) { case RK_T_EOF: fprintf(f, "eof "); break; case RK_T_STRING: fprintf(f, "string "); break; case RK_T_CSTRING: fprintf(f, "cstring "); break; case RK_T_INT: fprintf(f, "int "); break; case RK_T_DOUBLE: fprintf(f, "double "); break; case RK_T_CHAR: fprintf(f, "char "); break; case RK_T_IDENT: fprintf(f, "ident "); break; case RK_NT: fprintf(f, "%s ", rule->info.non_terminal->name); break; case RK_LIT: fprintf(f, "\"%s\" ", rule->info.str_value); break; case RK_OR_RULE: fprintf(f, "("); print_or(f, rule->info.or_rule); fprintf(f, ")"); break; } if (sequential) { if (chain_sym == NULL) fprintf(f, "SEQ "); else if (strcmp( chain_sym, ",")) fprintf(f, "LIST "); else fprintf(f, "CHAIN %s ", chain_sym); } if (optional) { fprintf(f, "OPT "); } print_rule(f, rule->next); } void make_initial_grammar(tree_p *root) { tree_p tt[100]; int v = 0; str_root = string("root"); str_nt_def = string("nt_def"); str_ident = string("ident"); str_or_rule = string("or_rule"); str_rule = string("rule"); str_opt_elem = string("opt_elem"); str_list_elem = string("list_elem"); str_opt = string("opt"); str_seq = string("seq"); str_list = string("List"); str_chain = string("chain"); str_prim_elem = string("prim_elem"); str_string = string("string"); str_cstring = string("cstring"); str_int = string("int"); str_double = string("double"); str_char = string("char"); str_eof = string("eof"); #define ID(s) tt[v]=make_ident(string(s)); #define STR(s) tt[v]=make_str_atom(string(s)); #define INT(i) tt[v]=make_int_atom(i); #define TREE(n) tt[v]=make_tree(string(n)); v++; #define LIST tt[v]=make_list(); v++; #define NONE tt[v]=NULL; #define SEP add_element(tt[v-1],tt[v]); tree_release(&tt[v]); #define CLOSE add_element(tt[v-1],tt[v]); tree_release(&tt[v]); v--; #define NT_DEF(I,E) TREE(str_nt_def) ID(I) SEP LIST E CLOSE CLOSE SEP #define OR_SEP SEP #define RULE(E,N) TREE(str_rule) LIST E CLOSE SEP N CLOSE #define OPT(E) TREE(str_opt) E CLOSE #define SEQ(E) TREE(str_seq) E CLOSE #define LIST_(E) TREE(str_list) E CLOSE #define CHAIN(E,S) TREE(str_chain) E SEP STR(S) CLOSE #define OR_OPEN LIST #define OR_CLOSE CLOSE LIST NT_DEF(str_root, RULE( SEQ(ID(str_nt_def)) SEP ID(str_eof) , NONE) ) NT_DEF(str_nt_def, RULE( ID(str_ident) SEP STR(":") SEP ID(str_or_rule) SEP STR(".") , ID(str_nt_def))) NT_DEF(str_or_rule, RULE( CHAIN(ID(str_rule), "|") , NONE) ) NT_DEF(str_rule, RULE( OPT( SEQ(ID(str_opt_elem)) ) SEP OPT( OR_OPEN RULE( STR("[") SEP ID(str_ident) SEP STR("]") , NONE) OR_CLOSE) , ID(str_rule)) ) NT_DEF(str_opt_elem, RULE( ID(str_list_elem) SEP STR("OPT") , ID(str_opt) ) OR_SEP RULE( ID(str_list_elem) , NONE ) ) NT_DEF(str_list_elem, RULE( ID(str_prim_elem) SEP STR("SEQ") , ID(str_seq)) OR_SEP RULE( ID(str_prim_elem) SEP STR("LIST") , ID(str_list)) OR_SEP RULE( ID(str_prim_elem) SEP STR("CHAIN") SEP ID(str_string) , ID(str_chain)) OR_SEP RULE( ID(str_prim_elem) , NONE) ) NT_DEF(str_prim_elem, RULE( ID(str_string) , NONE ) OR_SEP RULE( ID(str_ident) , NONE ) OR_SEP RULE( STR("(") SEP ID(str_or_rule) SEP STR(")") , NONE ) ) *root = tt[0]; } int main(int argc, char *argv[]) { tree_p tree = NULL; int i; if (argc == 1) { printf("Usage: %s \n" "\n" " options\n" " -p print parse tree\n" " -o ouput tree to C file\n" " +ds debug scanning (full)\n" " +dss debug scanning (normal)\n" " -ds no debug scanning\n" " +dp debug parsing\n" " -dp no debug parsing\n" " +dn debug non-terminals\n" " -dn no debug non-terminals\n", argv[0]); return 0; } fprintf(stderr, "Iparser, Version: %s\n", VERSION); make_initial_grammar(&tree); for (i = 1; i < argc; i++) { char *arg = argv[i]; printf("Processing: %s\n", arg); if (!strcmp(arg, "+g")) grammar_level++; if (!strcmp(arg, "-p")) { printf("tree:\n"); print_tree(stdout, tree, FALSE); printf("\n--------------\n"); } else if (!strcmp(arg, "-pc")) { printf("tree:\n"); print_tree(stdout, tree, TRUE); printf("\n--------------\n"); } else if (!strcmp(arg, "-o") && i + 1 < argc) { char *file_name = argv[++i]; FILE *fout = !strcmp(file_name, "-") ? stdout : fopen(file_name, "w"); if (fout != NULL) { char *dot = strstr(file_name, "."); if (dot) *dot = '\0'; print_tree_to_c(fout, tree, file_name); fclose(fout); } else { printf("Cannot open: %s\n", file_name); return 0; } } else if (!strcmp(arg, "+ds")) debug_scan = 2; else if (!strcmp(arg, "+dss")) debug_scan = 1; else if (!strcmp(arg, "-ds")) debug_scan = 0; else if (!strcmp(arg, "+dp")) debug_parse = TRUE; else if (!strcmp(arg, "-dp")) debug_parse = FALSE; else if (!strcmp(arg, "+dn")) debug_nt = TRUE; else if (!strcmp(arg, "-dn")) debug_nt = FALSE; else { FILE *fin = fopen(arg, "r"); if (fin != NULL) { tree_p new_tree = NULL; init_scan(fin); make_all_nt(tree); tree_release(&tree); init_solutions(); if (!parse_nt(find_nt(str_root), &new_tree)) { print_last_pos(); return 0; } tree_assign(&tree, part(new_tree, 1)); tree_release(&new_tree); free_solutions(); keyword_flag *= 2; new_keyword_flag *= 2; grammar_level--; fclose(fin); } else { printf("Cannot open: %s\n", arg); return 0; } } } tree_release(&tree); if (alloced_trees != 0) printf("Error: alloced trees = %ld\n", alloced_trees); return 0; }