diff options
-rw-r--r-- | .gitignore | 6 | ||||
-rw-r--r-- | CHECKLIST.txt | 55 | ||||
-rw-r--r-- | SemanticCheckList.txt | 37 | ||||
-rw-r--r-- | check/array.p | 32 | ||||
-rw-r--r-- | check/for.p | 11 | ||||
-rw-r--r-- | check/main.p | 5 | ||||
-rw-r--r-- | main.c | 65 | ||||
-rw-r--r-- | node.c | 6 | ||||
-rw-r--r-- | pc.h | 3 | ||||
-rw-r--r-- | pc.l | 12 | ||||
-rw-r--r-- | pc.y | 42 | ||||
-rw-r--r-- | scope.c | 13 | ||||
-rw-r--r-- | scope.h | 4 | ||||
-rw-r--r-- | sem_check.c | 130 | ||||
-rw-r--r-- | sem_check.h | 5 | ||||
-rw-r--r-- | tree.c | 128 | ||||
-rw-r--r-- | tree.h | 4 |
17 files changed, 486 insertions, 72 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..21840e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*~ +*.bak +*.o +y.* +mypc +lex.yy.c diff --git a/CHECKLIST.txt b/CHECKLIST.txt new file mode 100644 index 0000000..ce22f78 --- /dev/null +++ b/CHECKLIST.txt @@ -0,0 +1,55 @@ +Project: DRAGON +=============== + +- Arrange an online demo of your compiler during Finals week (15min). + +- Send a self-contained compressed tar source of your compiler by + email. Your compiler must run on the ITL machines. + +- Submit a hardcopy of your compiler documentation: design document, + user manual, testing report, status report (limitations, caveats, or + bugs), and a "dragon" haiku. Indicate clearly in your report an + extra feature that is unique to your compiler. + +CHECK LIST +---------- +[X] (1.0) Lexical Analysis +[X] a. Line numbering +[X] b. Two styles of comments +[X] c. (optional) Scientific notation + +[ ] (1.5) Syntax Analysis: grammar adjustments +[X] a. Unlimited nesting of subprograms +[ ] b. Array access on both sides of assignment +[X] c. Allow for statements. + d. (optional) Another loop construct + e. (optional) Multidimensional arrays +[ ] f. (optional) Records and pointers +[ ] +[ ] (2.0) Symbol Table +[ ] a. Memory-leak handler + b. (optional) Statistical analysis of hashpjw +[ ] +[ ] (2.5) Syntax Tree (Intermediate Code Generation) +[ ] a. Visual print +[ ] b. Memory-leak handler + +[ ] (3.0) Semantic Analysis & Type Checking +[ ] a. Check list +[ ] b. Error reporting +[ ] c. (optional) Error recovery + +[ ] (4.0) Code Generation +[ ] a. Input/Output statements +[ ] b. Simple expressions (arithmetic and relational): gencode +[ ] c. Statements (assignment, conditional, loop) +[ ] d. Nonlocal names: base frame pointer (static scope parent) +[ ] e. Recursive routines (example: GCD program) +[ ] f. Complex expressions (register spilling) +[ ] g. (optional) Arrays (L-value, R-value, parameters, nonlocal) +[ ] h. (optional) Floating-point support + +[ ] Extra Trails (under construction) +[ ] - Lambda or Objects. +[ ] - Code generator for IA64 or SPARC (RISC architecture). +[ ] - Code optimization. diff --git a/SemanticCheckList.txt b/SemanticCheckList.txt new file mode 100644 index 0000000..9bd2c2c --- /dev/null +++ b/SemanticCheckList.txt @@ -0,0 +1,37 @@ +DRAGON Semantic Checklist +========================= + +[ ] 1. Semantic rules for Scoping +[ ] 1.1. Local objects cannot be declared more than once +[ ] 1.2. Local objects hide non-local objects with the same name +[ ] 1.3. Non-local objects should be visible from inner scopes (unless a local object of the same name exists) +[ ] 1.4. Function and procedure names exist in the scope they are defined (and not in their own scopes) +[ ] 1.5. Local objects cease to exist once their scopes cease to exist + +[ ] 2. Semantic rules for Expressions +[ ] 2.1. Expressions return typed-values +[ ] 2.2. Objects must be declared before they used in expressions +[ ] 2.3. Objects of different types cannot appear in the same expression (no type promotions) + +[ ] 3. Semantic rules for Statements +[ ] 3.1. Statements do not return values +[ ] 3.2. The test expression for IF-THEN, IF-THEN-ELSE, WHILE-DO must be Boolean-valued; +[ ] note that the Boolean type must be implicitly defined +[ ] 3.3. The ELSE clause always binds to the closest IF (resolution of the dangling ELSE problem) +[ ] 3.4. The variable type in FOR-DO must match the types of lower bound and upper bound expressions + +[ ] 4. Semantic rules for Arrays +[ ] 4.1. Non-integer valued expressions cannot be used for indexing arrays + +[ ] 5. Semantic rules for Functions +[ ] 5.1. Function calls return values of type Integer or Real +[ ] 5.2. Function must contain a "return" statement within its own body; +[ ] this is of the form: <function_name> := <expression> +[ ] 5.3. Functions must accept exactly the same number of arguments as is +[ ] declared in its header, with the correct sequence of types +[X] 5.4. Functions are not allowed to update the value of nonlocal objects (via assignment statements) + +[ ] 6. Semantic rules for Procedures +[ ] 6.1. Procedure calls do not return values +[ ] 6.2. Procedures must accept exactly the same number of arguments as is +[ ] declared in its header, with the correct sequence of types diff --git a/check/array.p b/check/array.p new file mode 100644 index 0000000..e378655 --- /dev/null +++ b/check/array.p @@ -0,0 +1,32 @@ +program main ( input, output ); + + var a, b: integer; + var x,y,z: real; + var ai :array [1..10] of integer; + procedure boo (n: integer); + var a,c: integer; + begin + a := n + end; + function bar (a: integer) : real; + var test:integer; + begin + test := 2; + a := 2 + end; + procedure foo; + begin + a := 33333; + x := 1e10; + z := 1e-10; + y := 2.5543e-2 + end; +begin +{ TEST } + + a := 1; + x := 3.14; + b := a + 35; + ai[2] := a + (* test *) +end. diff --git a/check/for.p b/check/for.p new file mode 100644 index 0000000..5391c8c --- /dev/null +++ b/check/for.p @@ -0,0 +1,11 @@ +program main ( input, output ); + + var a, b,c: integer; +begin + + a := 1; + (* test *) + for c := 0 to 10 do begin + for a:= 10 downto 0 do b := a - c + end +end. diff --git a/check/main.p b/check/main.p index f05808b..613f8dc 100644 --- a/check/main.p +++ b/check/main.p @@ -26,9 +26,6 @@ begin a := 1; x := 3.14; - b := a + 35; + b := a + 35 (* test *) - for c := 0 to 10 do begin - for a:= 10 downto 0 do b := a - c - end end. @@ -1,3 +1,5 @@ +#include "pc.h" + #include <stdio.h> #include <stdlib.h> #include <assert.h> @@ -5,7 +7,6 @@ #include "node.h" #include "scope.h" #include "y.tab.h" -#include "pc.h" extern char *yytext; extern int line_num; @@ -17,10 +18,10 @@ int t; { if (t == ARRAY) { return "ARRAY"; - } else if (t == INT - ARRAY) { - return "ARRAY of INT"; - } else if (t == INT - ARRAY) { + } else if (t == ARRAY - INT) { return "ARRAY of INT"; + } else if (t == ARRAY - REAL) { + return "ARRAY of REAL"; } else if (t == INT) { return "INT"; } else if (t == REAL) { @@ -129,15 +130,65 @@ int yyerror(msg) char* msg; { fprintf(stderr, "\nError, line %d: %s\n", line_num, msg); -#ifdef DEBUG - fprintf(stderr, "%s\n", yytext); exit(1); -#endif return 0; } int main() { +#ifdef DEBUG_TYPES + printf( + "\nPROG\t\t%d\n" + "VAR\t\t%d\n" + "PROC\t\t%d\n" + "FUNC\t\t%d\n" + "BEG\t\t%d\n" + "END\t\t%d\n" + "ID\t\t%d\n" + "ADDOP\t\t%d\n" + "MULOP\t\t%d\n" + "RELOP\t\t%d\n" + "ASSIGNOP\t\t%d\n" + "ADD\t\t%d\n" + "SUB\t\t%d\n" + "MUL\t\t%d\n" + "DIV\t\t%d\n" + "NOT\t\t%d\n" + "AND\t\t%d\n" + "OR\t\t%d\n" + "EQ\t\t%d\n" + "NE\t\t%d\n" + "LT\t\t%d\n" + "LE\t\t%d\n" + "GT\t\t%d\n" + "GE\t\t%d\n" + "INUM\t\t%d\n" + "RNUM\t\t%d\n" + "INT\t\t%d\n" + "REAL\t\t%d\n" + "BOOL\t\t%d\n" + "ARRAY\t\t%d\n" + "OF\t\t%d\n" + "DOTS\t\t%d\n" + "IF\t\t%d\n" + "ELSE\t\t%d\n" + "THEN\t\t%d\n" + "WHILE\t\t%d\n" + "DO\t\t%d\n" + "FOR\t\t%d\n" + "TO\t\t%d\n" + "DT\t\t%d\n" + "FCALL\t\t%d\n" + "PCALL\t\t%d\n" + "ARRAY_ACCESS\t\t%d\n" + "LIST\t\t%d\n", + + PROG, VAR, PROC, FUNC, BEG, END, ID, ADDOP, MULOP, RELOP, ASSIGNOP, ADD, + SUB, MUL, DIV, NOT, AND, OR, EQ, NE, LT, LE, GT, GE, INUM, RNUM, INT, REAL, + BOOL, ARRAY, OF, DOTS, IF, ELSE, THEN, WHILE, DO, FOR, TO, DT, FCALL, PCALL, + ARRAY_ACCESS, LIST); +#endif + cur_scope = mkscope(); assert(cur_scope); @@ -1,10 +1,10 @@ +#include "node.h" + #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <string.h> -#include "node.h" - /*constructor*/ node* mknode(str) char *str; @@ -15,6 +15,8 @@ char *str; p->name = strdup(str); p->next = NULL; + p->var_type = -1; + return p; } @@ -1,5 +1,8 @@ #ifndef PC_H #define PC_H + +#include "y.tab.h" + char* pretty_type(int); void debug_print(int, union YYSTYPE*); @@ -174,37 +174,37 @@ id [A-Za-z][A-Za-z0-9_]* "+" { yylval.opval = ADD; - debug_print(RELOP, &yylval); + debug_print(ADDOP, &yylval); return ADDOP; } "-" { yylval.opval = SUB; - debug_print(RELOP, &yylval); + debug_print(ADDOP, &yylval); return ADDOP; } "or" { yylval.opval = OR; - debug_print(RELOP, &yylval); + debug_print(ADDOP, &yylval); return ADDOP; } "*" { yylval.opval = MUL; - debug_print(RELOP, &yylval); + debug_print(MULOP, &yylval); return MULOP; } "/" { yylval.opval = DIV; - debug_print(RELOP, &yylval); + debug_print(MULOP, &yylval); return MULOP; } "and" { yylval.opval = AND; - debug_print(RELOP, &yylval); + debug_print(MULOP, &yylval); return MULOP; } @@ -1,6 +1,7 @@ %{ #include <stdlib.h> #include <stddef.h> +#include <assert.h> #include "node.h" #include "scope.h" @@ -83,6 +84,8 @@ extern scope *cur_scope; %type <ival> type %type <ival> standard_type +%type <ival> TD + %% program @@ -92,6 +95,7 @@ program compound_statement '.' { + set_ret_type($9); print_tree($9); free_tree($4); } @@ -159,6 +163,7 @@ sub_prog_declaration sub_prog_declarations compound_statement { + set_ret_type($4); print_tree($4); free_tree($4); pop_scope(&cur_scope); @@ -194,10 +199,12 @@ param_list :id_list ':' type { $$ = $1; + update_type_info($1, $3); } |param_list ';' id_list ':' type { $$ = mktree(LIST, $1, $3); + update_type_info($3, $5); } ; @@ -252,24 +259,49 @@ statement } |FOR var ASSIGNOP expr TD expr DO statement { - /*TODO design tree structure for FOR loops*/ - $$ = NULL; + /* + FOR + / \ + TD STATEMENT + / \ + ASSIGNOP INUM + */ + ptree *tmp; + + tmp = mktree(ASSIGNOP, $2, $4); + tmp = mktree($5, tmp, $6); //$5 is TD + + $$ = mktree(FOR, tmp, $8); + } + | expr + { + $$ = $1; } ; -TD: TO | DT; +TD + :TO + { + $$ = TO; + } + |DT + { + $$ = DT; + } +; var :ID { - $$ = mkid(scope_insert(cur_scope,$1)); + $$ = mkid(scope_safe_search(cur_scope,$1)); } |ID '[' expr ']' { node* tmp; - tmp = scope_insert(cur_scope, $1); + tmp = scope_safe_search(cur_scope, $1); $$ = mktree(ARRAY_ACCESS, mkid(tmp), $3); + $$->attr.nval = $$->l->attr.nval; } ; @@ -1,10 +1,12 @@ +#include "scope.h" + #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <string.h> -#include "node.h" -#include "scope.h" +#include "pc.h" + scope* mkscope() { @@ -138,9 +140,14 @@ scope *s; { int i; node * tmp; + + fprintf(stderr, "\n\nSCOPE\n" + "==========================================================\n"); + for (i = 0; i < HASH_SIZE; i++) { for( tmp=s->table[i]; tmp; tmp = tmp->next) { - fprintf(stderr, "\t%s\n", tmp->name); + fprintf(stderr, "\t%s:%s\n", tmp->name, + pretty_type(tmp->var_type)); } } } @@ -1,6 +1,8 @@ #ifndef SCOPE_H #define SCOPE_H +#include "node.h" + #define HASH_SIZE 211 typedef struct hash { @@ -20,7 +22,7 @@ void push_scope(scope**); node* scope_insert(scope*, char*); node* scope_search_all(scope*, char*); node* scope_search(scope*, char*); -node* scope_safe_search_all(scope*, char*); +node* scope_safe_search(scope*, char*); /*hash function*/ int hashpjw(char*); diff --git a/sem_check.c b/sem_check.c index 637d044..3d42996 100644 --- a/sem_check.c +++ b/sem_check.c @@ -1,12 +1,10 @@ +#include "sem_check.h" + #include <assert.h> #include <stdio.h> -#include "node.h" -#include "scope.h" -#include "tree.h" #include "y.tab.h" #include "pc.h" -#include "sem_check.h" void check_id(s, n) scope *s; @@ -34,3 +32,127 @@ char *n; return tmp; } + +int check_ret_type(t) +ptree *t; +{ + char buf[100]; + int type; + + if (!t) + fprintf(stderr, "TYPE: %d\n", t->type); + + switch (t->type) { + case ID: + if (!(t->attr.nval)) + yyerror("Missing ID\n"); + + return t->attr.nval->var_type; + + case ADDOP : + case MULOP : + if (!(t->r && t->l)) + yyerror("Missing nodes\n"); + + if (t->attr.opval == ADD || t->attr.opval == OR) { + if(t->l->ret_type == BOOL && t->r->ret_type ==BOOL) + return BOOL; + else { + type = t->l->ret_type == BOOL ? + t->r->ret_type : t->l->ret_type; + + snprintf(buf, 100, "Mismached types:" + "Cannot use boolean " + "operator on type %s\n", + pretty_type(type)); + yyerror(buf); + } + } + + if (t->r->ret_type == t->l->ret_type) + return t->r->ret_type; + else { + snprintf(buf, 100, "Mismached types: " + "Type %s " + "cannot be used with type %s\n", + pretty_type(t->r->ret_type), + pretty_type(t->l->ret_type)); + yyerror(buf); + } + break; + case RELOP : + if (!(t->r && t->l)) + yyerror("Missing nodes\n"); + if (t->r->ret_type == t->l->ret_type) + return BOOL; + else + snprintf(buf, 100, "Mismached types: " + "Type %s " + "cannot be compared to type %s\n", + pretty_type(t->r->ret_type), + pretty_type(t->l->ret_type)); + yyerror(buf); + break; + case NOT: + if (t->l && t->l->ret_type == BOOL) + return BOOL; + yyerror("NOT needs bool input\n"); + break; + case INUM: + return INT; + case RNUM: + return REAL; + case ASSIGNOP: + if (!(t->r && t->l)) + yyerror("Incomplete parse tree\n"); + + if (t->l->ret_type == t->r->ret_type) + return 0; + else { + snprintf(buf, 100, "Mismached types: " + "Cannot assign type %s " + "to variable \"%s\" of type %s\n", + pretty_type(t->r->ret_type), + t->l->attr.nval->name, + pretty_type(t->l->attr.nval->var_type)); + yyerror(buf); + } + + + break; + case ARRAY_ACCESS: + if (!(t->r && t->l && t->l->attr.nval)) + yyerror("Incorrect Array Access\n"); + + if (t->r->ret_type != INT) { + snprintf(buf, 100, "Cannot access array" + "with type %s\n", + pretty_type(t->r->ret_type)); + yyerror(buf); + } + + type = t->l->attr.nval -> var_type; + if (type == ARRAY - INT || type == ARRAY - REAL) + return ARRAY - type; + break; + case IF: + case WHILE: + if (!(t->r && t->l)) + yyerror("Incomplete parse tree\n"); + + if (t->l->ret_type != BOOL) + yyerror("If condition must be of type BOOL\n"); + return 0; + case FOR: + /*TODO add for type checking after parsing is correct*/ + break; + default: + return -200; + snprintf(buf, 101, "Unknown tree node: %d...\n", t->type); + yyerror(buf); + } + + return -1; + +} + diff --git a/sem_check.h b/sem_check.h index eef9e96..ce7d7fb 100644 --- a/sem_check.h +++ b/sem_check.h @@ -1,8 +1,13 @@ #ifndef SEMCHECK_H #define SEMCHECK_H +#include "scope.h" +#include "tree.h" + void check_id(scope*, char*); node* check_exists(scope*, char*); +int check_ret_type(ptree*); + #endif @@ -1,12 +1,14 @@ +#include "tree.h" + #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> -#include "node.h" -#include "tree.h" #include "y.tab.h" +#include "scope.h" #include "pc.h" +#include "sem_check.h" /* parse tree funcs */ ptree* mktree(type, l, r) @@ -27,7 +29,7 @@ ptree* mkid(n) node *n; { ptree *p = mktree(ID, NULL, NULL); - p->attr.nval = n; /* memory leak? double strdup*/ + p->attr.nval = n; return p; } @@ -61,22 +63,42 @@ int type; ptree *list; { assert(list); + if (list->type == ID) { + list->attr.nval->var_type = type; + return; + } + while (list->r && list->r->type == ID) { /*Set type of right child through list*/ list->r->attr.nval->var_type = type; - + if (list->l) { if (list->l->type == LIST) { list = list->l; continue; /*Continue down list*/ } else if (list->l->type == ID) - /*Set type of first declared ID (only left node in LIST)*/ + /*Set type of first declared ID + (only left node in LIST)*/ list->l->attr.nval->var_type = type; } return; /*At _end_ of list (did not continue)*/ } } +void set_ret_type(t) +ptree *t; +{ + if (!t) + return; + + + set_ret_type(t->l); + set_ret_type(t->r); + t->ret_type = check_ret_type(t); + + return; +} + /*PRINT FUNCS*/ @@ -100,42 +122,68 @@ int spaces; for (i = 0; i < spaces; i++) fprintf(stderr," "); switch (t->type) { - - case ADDOP: - fprintf(stderr, "[ADDOP]"); - break; - case MULOP: - fprintf(stderr, "[MULOP]"); - break; - case RELOP: - fprintf(stderr, "[RELOP]"); - break; - case NOT: - fprintf(stderr, "[NOT]"); - break; - case ARRAY_ACCESS: - fprintf(stderr, "[ARRAY ACCESS]"); - break; - case LIST: - fprintf(stderr, "[LIST]"); - break; - case ID: - fprintf(stderr, "[ID: %s %d]", t->attr.nval->name, t->attr.nval->var_type); - break; - case INUM: - fprintf(stderr, "[INUM: %d]", t->attr.ival); - break; - case RNUM: - fprintf(stderr, "[RNUM: %f]", t->attr.rval); - break; - case ASSIGNOP: - fprintf(stderr, "[ASSIGN]"); - break; - default: - fprintf(stderr, "\t%d", t->type); - yyerror("Error in tree_print"); + case ADDOP: + fprintf(stderr, "[ADDOP]"); + break; + case MULOP: + fprintf(stderr, "[MULOP]"); + break; + case RELOP: + fprintf(stderr, "[RELOP]"); + break; + case NOT: + fprintf(stderr, "[NOT]"); + break; + case ARRAY_ACCESS: + fprintf(stderr, "[ARRAY ACCESS]"); + break; + case LIST: + fprintf(stderr, "[LIST]"); + break; + case ID: + if (t->r && t->r->attr.nval) + fprintf(stderr, "[ID: %s %s]", + t->r->attr.nval->name, + pretty_type( + t->attr.nval->var_type)); + else + fprintf(stderr, "[ID: %s %s]", + t->attr.nval->name, + pretty_type( + t->attr.nval->var_type)); + break; + case INUM: + fprintf(stderr, "[INUM: %d]", t->attr.ival); + break; + case RNUM: + fprintf(stderr, "[RNUM: %f]", t->attr.rval); + break; + case ASSIGNOP: + fprintf(stderr, "[ASSIGN]"); + break; + case IF: + fprintf(stderr, "[IF]"); + break; + case THEN: + fprintf(stderr, "[THEN]"); + break; + case WHILE: + fprintf(stderr, "[WHILE]"); + break; + case FOR: + fprintf(stderr, "[FOR]"); + break; + case TO: + fprintf(stderr, "[TO]"); + break; + case DT: + fprintf(stderr, "[DOWN-TO]"); + break; + default: + fprintf(stderr, "\t%d", t->type); + yyerror("Error in tree_print"); } - fprintf(stderr,"\n"); + fprintf(stderr," %d\n", t->ret_type); aux_tree_print(t->l, spaces + 2); fprintf(stderr,"\n"); aux_tree_print(t->r, spaces + 2); @@ -1,6 +1,8 @@ #ifndef TREE_H #define TREE_H +#include "node.h" + typedef struct parse_tree { int type; union { @@ -12,6 +14,7 @@ typedef struct parse_tree { MULOP: MUL DIV */ } attr; + int ret_type; struct parse_tree *l, *r; } ptree; @@ -25,6 +28,7 @@ ptree* mkrnum(float); ptree* mkop(int, int, ptree*, ptree*); void update_type_info(ptree*, int); +void set_ret_type(ptree*); void free_tree(ptree*); |