Principles Of Modern Compiler Design(PMCD)

Assignment-1 :-Lexical analyzer for C language using LEX.

%option noyywrap

%{
            #include<stdio.h>
            FILE *fp;
            char fname[20];
            struct symtab
            {
                        char symname[10];
            };
           
            struct symtab s[20];
            int sindex=0;
%}

%%

"float"|"int"|"char"|"string"|"long"|"const"|"for"|"if"|"else"|"while"|"do"|"struct"          {printf("%s\t%s\n" , yytext,"Keyword");}

[a-zA-Z_][a-zA-Z_0-9]*                                             {printf("%s\t%s\n" , yytext,"Identifier");
                                                                         insert(yytext);}
           
[+-]?[0-9]+(([.][0-9]+)?([eE][+-]?[0-9]+)?)?    {printf("%s\t%s\n" , yytext,"Digits");}

["][^"\n]*["]                             {printf("%s\t%s\n",yytext,"String constant");}
           
[ \t\n]                                       {printf("%c\t%s\n" , yytext[0],"White Spaces");}

[,;]                                            {printf("%c\t%s\n" , yytext[0],"Delimiter");}

[=]                                           {printf("%c\t%s\n" , yytext[0],"Assignment operator");}

[-+*/%]                                                {printf("%c\t%s\n" , yytext[0],"Arithmetic operator");}

"<"|">"|"<="|">="|"=="                        {printf("%s \t%s\n", yytext,"Relational operator");}

[(){}]                                       {printf("%c\t%s\n" , yytext[0],"Bracket");}

"["|"]"                                       {printf("%c\t%s\n" , yytext[0],"Sqaure Bracket");}

[ \t]*"#include"[ \t]*"<"[a-zA-Z]+[.][hc]">" {printf("%s\t%s\n" , yytext,"Preprocessor directive");}

[ \t]*"#include"[ \t]*["][a-zA-Z]+[.][hc]["] {printf("%s\t%s\n" , yytext,"Preprocessor directive");}

"//".*                                        {printf("%s\t%s\n" , yytext,"Single line comment");}

"/*"([^*]*|[^/]*)"*/"                 {printf("%s\t%s\n" , yytext,"Multi line comment");}


.                                               {printf("%s\t%s\n" , yytext,"Invalid");}

%%

main()
{
            printf("Enter the filename :");
            scanf("%s",fname);
            fp=fopen(fname,"r");
            yyin=fp;
            yylex();
            int j=0;
            printf("Symbol table is:\n");
            printf("SR.no\t\tSymbol name");
           
            for(j=0;j<sindex;j++)
            {
                       
                        printf( "\n%d\t\t%s",j,s[j].symname);
                        printf("\n");
                       
           
            }
           
            return 0;

}

int insert (char *name)
            {
                        int i=0;
                        for(i=0;i<sindex;i++)
                        {
                                    if(strcmp(s[i].symname,name)==0)
                                    return i;
                        }
                        strcpy(s[sindex].symname,name);
                        sindex++;
                        return sindex;
            }
           
Input file:

#include <stdio.h>

int main()
{
            int a,b,sum;
            a=10,b=15;
            sum=a+b;       
            //My Comment
            /*Multilinecomment*/
           
return 0;
}

/****************OUTPUT****************

pccoe@212A-17:~$ cd Desktop
pccoe@212A-17:~/Desktop$ lex lexan.l
pccoe@212A-17:~/Desktop$ gcc lex.yy.c
pccoe@212A-17:~/Desktop$ ./a.out
Enter the filename :myprogram.txt
#include <stdio.h>      Preprocessor directive

            White Spaces

            White Spaces
int        Keyword
            White Spaces
main    Identifier
(           Bracket
)           Bracket

            White Spaces
{          Bracket

            White Spaces
                        White Spaces
int        Keyword
            White Spaces
a          Identifier
,           Delimiter
b          Identifier
,           Delimiter
sum      Identifier
;           Delimiter

            White Spaces
                        White Spaces
a          Identifier
=          Assignment operator
10        Digits
,           Delimiter
b          Identifier
=          Assignment operator
15        Digits
;           Delimiter

            White Spaces
                        White Spaces
sum      Identifier
=          Assignment operator
a          Identifier
+          Arithmetic operator
b          Identifier
;           Delimiter
                        White Spaces

            White Spaces
                        White Spaces
//My Comment            Single line comment

            White Spaces
                        White Spaces
/*Multilinecomment*/ Multi line comment

            White Spaces
                        White Spaces

            White Spaces
return   Identifier
            White Spaces
0          Digits
;           Delimiter

            White Spaces
}          Bracket

            White Spaces
Symbol table is:
SR.no              Symbol name
0                      main

1                      a

2                      b

3                      sum

4                      return
pccoe@212A-17:~/Desktop$

Assignment-2: Implementing recursive descent parser for sample language(RDP).

/*
            Parser for Production:
            E -> TE'
            E'-> +TE'
            T -> FT'
            T'-> *FT'
            F -> [a-z]*[A-Z]*[0-9]*(,)
*/

#include<stdio.h>
#include<string.h>
#include<ctype.h>

// Global Variables
char inputString[20];
int error = 0;
int i = 0;

void E();
void E_Dash();
void T();
void T_Dash();
void F();

int main(){

            printf("Enter String : ");
            gets(inputString);

            E();

            if( error == 0 && strlen(inputString) == i ){

                        printf("String Accepted\n");

            }else{

                        printf("String Not Accepted\n");
            }

}

void E(){

            T();
            E_Dash();
}

void E_Dash(){

            if( inputString[i] == '+'){
                        i++;
                        T();
                        E_Dash();
            }
}

void T(){

            F();
            T_Dash();
}

void T_Dash(){

            if( inputString[i] == '*' ){
                        i++;
                        F();
                        T_Dash();
            }
}

void F(){

            if( isalpha(inputString[i]) ){

                        i++;

            }else if( isdigit(inputString[i]) ){

                        i++;

            }else if( inputString[i] == '(' ){

                        i++;

                        E();

                        if( inputString[i] == ')'){

                                    i++;

                        }else{

                                    error = 1;
                        }

            }else{

                        error = 1;
            }
}

/****************OUTPUT****************
administrator@210:~$ gcc Parser.c
administrator@210:~$ ./a.out
Enter String : (a+b)*(e+d) 
String Accepted
administrator@210:~$ ./a.out
Enter String : (a+b)*(c+d)
String Accepted
administrator@210:~$ ./a.out
Enter String : (a+b
String Not Accepted
administrator@210:~$
*/

Assignment-3: Implementation of calculator program using LEX and YACC.(Calculator)
Parser.y
%{
            #include<stdio.h>
            #include<string.h>  
            #include<math.h>
           

  struct symtab
  {
            char symname[20];
            double sval;
  };
  struct symtab s[20];
  int sindex=0;
  int i;
%}

%union
{
char name[20];
double val;
}

%token <name>ID
%token <val>NUM
%token            SIN
%token            COS
%token            TAN
%token            SQRT
%token            LOG
%left '+' '-'
%left '*' '/'
%nonassoc UM
%type <val>E

%%
slist      :slist S
            |S
            ;

S          :ID'='E';'                      {i=insert($1);s[i].sval=$3;}                            
            |E';'                              {printf("%f",$1);}
            ;

E          :E'+'E                          {$$=$1+$3;}
            |E'-'E                            {$$=$1-$3;}
            |E'*'E                           {$$=$1*$3;}
            |E'/'E                            {$$=$1/$3;}
            |'-'E      %prec UM       {$$=-$2;}
            |SIN'('E')'                     {$$=sin($3*3.141/180.0);}
            |COS'('E')'                   {$$=cos($3*3.141/180.0);}
            |TAN'('E')'                   {$$=tan($3*3.141/180.0);}
            |SQRT'('E')'                 {$$=sqrt($3);}
            |LOG'('E')'                   {$$=log($3);}
            |ID                               {i=insert($1);$$=s[i].sval;}
            |NUM                          {$$=$1;}

%%

main()
{

yyparse();
}
int insert(char *name)
{
            int i=0;
            for(i=0;i<sindex;i++)
            {
                        if(strcmp(s[i].symname,name)==0)
                                    return i;
            }
            strcpy(s[sindex].symname,name);
            sindex++;
            return sindex;
}
int yyerror(char *s)
{
            printf("%sInvalid input\n",s);
}         


Tokens.l




%{

  #include "y.tab.h"

%}

%%
"SIN"                          {return SIN;}
"COS"                         {return COS;}
"TAN"                         {return TAN;}
"SQRT"                                   {return SQRT;}
"LOG"                         {return LOG;}
[a-zA-Z_][A-Za-z_0-9]*         {strcpy(yylval.name,yytext); return ID;}
[0-9]+(\.[0-9]+)?          {yylval.val=atof(yytext); return NUM;}

[-+*/=();]                     {return yytext[0];}
[ \t\n]                           {;}
"$"                               {return 0;}


%%


int yywrap()
{
      return 1;
}

/****************OUTPUT****************


pccoe@212A-17:~$
pccoe@212A-17:~/ $ yacc -d cal.y
pccoe@212A-17:~/ $ lex cal.l
pccoe@212A-17:~/ $ gcc lex.yy.c y.tab.c -lm
pccoe@212A-17:~/ $ ./a.out
6+10;
16.000000
5-9;
-4.000000
4*2;
8.000000
15/3;
5.000000
-2;
-2.000000
SIN(90);
1.000000
COS(45);
0.707212
TAN(0);
0.000000
LOG(10);
2.302585
SQRT(64);
8.000000

Assignment-4: Intermediate code generation for sample language using LEX and YACC.

Lex Program

%{
            #include"y.tab.h"
            #include<string.h>

%}

%%
"main"                                     {return MAIN;}
[0-9]+                                      {strcpy(yylval.s,yytext);return NUM;}
[a-zA-Z_][a-zA-Z_0-9]*         {strcpy(yylval.s,yytext);return ID;}
[-+*/()={};]                             {return yytext[0];}
[\n \t]                                       {;}

%%

int yyerror(char *s)
{
            printf("%s",s);
}
int yywrap()
{
            return -1;
}
-------------------------------------------------------------------------
Yacc Program

%{
            #include<stdio.h>
            #include<string.h>
           
            struct{
            char oper[10];
            char arg1[10];
            char arg2[10];
            char res[10];
            }QTAB[20];
            extern FILE *yyin;
            int qindex=0;
            int tindex=1;
%}

%union
{
            char s[10];
}
%token MAIN
%token<s> ID
%token<s> NUM
%type<s> E
%left '+' '-'
%left  '*' '/'
%nonassoc UMINUS

%%
PROG : MAIN '(' ')' BLOCK
            ;
BLOCK          : '{' stmtlist '}'
            ;
stmtlist: stmtlist S
            | S
            ;
S          : assignstmt ';'
            ;
assignstmt: ID '=' E                 {strcpy(QTAB[qindex].oper,"=");strcpy(QTAB[qindex].arg1,$3);strcpy(QTAB[qindex].res,$1);qindex++;}
              ;
E          : E '+' E                       {addquad("+",$1,$3,$$);}
            | E '*' E                        {addquad("*",$1,$3,$$);}
            | E '-' E                         {addquad("-",$1,$3,$$);}
            | E '/' E                         {addquad("/",$1,$3,$$);}
            | '-' E                            {addquad("UMINUS",$2,"",$$);}
            | ID                              {strcpy($$,$1);}
            | NUM                         {strcpy($$,$1);}
            ;
%%

void addquad(char *s1,char *s2, char *s3, char *s4)
{
            strcpy(QTAB[qindex].oper,s1);
            strcpy(QTAB[qindex].arg1,s2);
            strcpy(QTAB[qindex].arg2,s3);
            sprintf(s4,"t%d",tindex);
            strcpy(QTAB[qindex].res,s4);
            tindex++;
            qindex++;
}

void display()
{
            int i=0;
            printf("OP\tARG1\tARG2\tRES\n");
            for(i=0;i<qindex;i++)
            {
                        printf("\n%s\t",QTAB[i].oper);
                        printf("%s\t",QTAB[i].arg1);
                        printf("%s\t",QTAB[i].arg2);
                        printf("%s\t",QTAB[i].res);
                        printf("\n");
            }
}

main()
{
            yyin=fopen("/home/admin1/k","rw");
            if(yyin == NULL)
            {
                        printf("error");
                        return 0;
            }
            yyparse();
            display();
}
-------------------------------------------------------------------------
Input file
main()
{c=a+b;
a=a*c;}

/****************OUTPUT****************
admin1@ubuntu:~$ yacc -d pmcd4.y
admin1@ubuntu:~$ lex pmcd_4.l
admin1@ubuntu:~$ gcc lex.yy.c y.tab.c
admin1@ubuntu:~$ ./a.out
OP       ARG1 ARG2 RES

+          a          b          t1        

=          t1                     c         

*          a          c          t2        

=          t2                     a         
*/ 

Assignment-5:-  Generating abstract syntax tree using LEX and YACC.

YACC program

%{
            #include<stdio.h>
            #include<string.h>
            #include<stdlib.h>
            struct node
            {
                        char name[10];
                        struct node *left;
                        struct node *right;
            };
            struct node *root,*temp;
            extern FILE *yyin;
%}
%union{
char str[10];
struct node *p;
}
%token <str>ID
%type<p>E
%left '+''-'
%left '*''/'
%nonassoc UMINUS
%%
S          :  ID '=' E                    {temp = createnode($1,NULL,NULL); root=createnode("=",temp,$3); }
            |  E                               {root=$1;}
            ;
E          :  E '+' E                      {$$ = createnode("+",$1,$3);}
            |  '-' E                           {$$ = createnode("UMINUS",$2,NULL);}
            |  '(' E ')'                       {$$ = $2;}
            |  ID                             {$$ = createnode($1,NULL,NULL);}
            ;
%%
struct node *createnode(char *s,struct node *l,struct node *r)
{
            struct node *new;
            new = (struct node*)malloc(sizeof(struct node));
            if(new!=NULL)
            {
                        strcpy(new->name,s);
                        new->left=l;
                        new->right=r;                         
            }
            else
            {
                        printf("\nNode not created !!");
                        exit(0);
            }
            return new;
}
 void inorder(struct node *temp1)
{
            if(temp1!= NULL)
            {
                        inorder(temp1->left);
                        printf("\n %s\n ", temp1->name);
                        inorder(temp1->right);
            }
}

main()
{
            FILE *fp;
            fp=fopen("input.txt","r");
            if(fp == NULL)
            {
                        printf("error");
            }
            yyin=fp;
            yyparse();
            inorder(root);
}
-------------------------------------------------------------------------
LEX Program

%{
            #include"y.tab.h"
            #include<string.h>

%}

%%
[0-9]+                                                  {;}
[a-zA-Z_][a-zA-Z_0-9]*                     {strcpy(yylval.str,yytext);return ID;}
[-*+/()={};]                                         {return yytext[0];}
[\n \t]                                                   {;}
%%

int yyerror(char *s)
{
            printf("%s",s);
}
int yywrap()
{
            return -1;
}

input.txt

a=b+c

/****************OUTPUT****************
admin1@ubuntu:~/Desktop$ yacc -d pmcd5.y
admin1@ubuntu:~/Desktop$ lex pmcd_5.l
admin1@ubuntu:~/Desktop$ gcc lex.yy.c y.tab.c
admin1@ubuntu:~/Desktop$ ./a.out

 a

 =

 b

 +

 c
 admin1@ubuntu:~/Desktop$
*/ 

 Assignment-6: : Implementation of Code generation using DAG / labeled tree
Code-gen.y

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
void yyerror(char *);
typedef struct Stack
{
            char s[2][3];
            int tp;
}stack;

stack rstack,tstack;
void push(char c,char a[]);
char * pop(char c);
char * top(char c);

typedef struct NODES
{
            char data[10];
            int label;
            struct NODES *left;
            struct NODES * right;
}NODE;
NODE * root=NULL, *temp=NULL, *p=NULL;
void inorder(NODE *);
void swap(char c);
void gencode(NODE *n,char c);
void labelling(NODE *);
NODE * createNode(char a[], NODE *,NODE *);
char R[3],T[3],a[3];
int i,j;
int nr=2;//no of available registers
%}
%union
{
char s[20];
struct NODES *n;
}
%token <s>ID
%type <n>E
%left '='
%left '+' '-'
%left '*' '/'

%%
S:E {root=$1; gencode(root,'g'); } 
 ;
E:E'+'E {$$=createNode("ADD",$1,$3); labelling($$);}
 |E'*'E {$$=createNode("MUL",$1,$3); labelling($$); }
 |E'-'E { $$=createNode("SUB",$1,$3); labelling($$);}
 |E'/'E { $$=createNode("DIv",$1,$3); labelling($$);}
 |'('E')' {$$=$2;}
 |ID { $$=createNode($1,NULL,NULL); }
 ;
%%
void main()
{
int g;
root=NULL;
temp=NULL;
p=NULL;
            j=1;
            rstack.tp=-1;
            tstack.tp=-1;
            push('r',"R1");
            push('r',"R0");

            push('t',"T1");
            push('t',"T0");

           
            yyparse();
            printf("\nThe syntax tree is:- \n");
            printf("\nNODE   LABEL");
            printf("\n=============");
            inorder(root);
            printf("\n");
}
void yyerror(char *s)
{         
                        printf("%s",s);
}

NODE * createNode(char a[],NODE * l,NODE * r)
{

  temp=(NODE *)malloc(sizeof(NODE));
  strcpy(temp->data,a);
  temp->left=l;
  temp->right=r;
  temp->label=0;
  return temp;
}

void inorder(NODE * a)
{
            if(a!=NULL)
            {
                        inorder(a->left);
                        printf("\n%s     %d", a->data,a->label);
                        inorder(a->right);
            }
}

void push(char c,char a[])
{
            if(c=='r')
            {
                        rstack.tp++;
                        strcpy(rstack.s[rstack.tp],a);
            }
            else
            {
                        tstack.tp++;
                        strcpy(tstack.s[tstack.tp],a);
            }
}

char * pop(char c)
{
            if(c=='r')
            {
                        strcpy(a,rstack.s[rstack.tp]);
                        strcpy(rstack.s[rstack.tp],"");
                        rstack.tp--;
            }
            else
            {
                        strcpy(a,tstack.s[tstack.tp]);
                        strcpy(tstack.s[tstack.tp],"");
                        tstack.tp--;
            }
            return a;
}

char * top(char c)
{
            if(c=='r')
            {
                        strcpy(a,rstack.s[rstack.tp]);
            }
            else
            {
                        strcpy(a,tstack.s[tstack.tp]);
            }
            return a;
}

void swap(char c)
{
            char t[3];
            if(c=='r')
            {
                        strcpy(t,rstack.s[rstack.tp]);
                        strcpy(rstack.s[rstack.tp],rstack.s[rstack.tp-1]);
                        strcpy(rstack.s[rstack.tp-1],t);
            }
            else
            {
                        strcpy(t,tstack.s[tstack.tp]);
                        strcpy(tstack.s[tstack.tp],tstack.s[tstack.tp-1]);
                        strcpy(tstack.s[tstack.tp-1],t);
            }
}

void labelling(NODE *a)
{
            NODE *d,*d1;
            d=a->left;
            d1=a->right;
            if(d->label==0)
                        a->left->label=1;
            if(d->label>d1->label)
                        a->label=d->label;
            else if(d->label<d1->label)
                        a->label=d1->label;
            else
                        a->label=d->label+1;
}

void gencode(NODE *n,char c)
{
            if((n->left==NULL) && (n->right==NULL))
            {
                        if(c=='l')
                        {
                                    printf("\nMOV %s, %s",n->data,top('r'));
                        }
            }         
            else if(n->right->label==0)
            {
                        gencode(n->left,'l');
                        printf("\n%s, %s, %s",n->data,n->right->data,top('r'));
            }
            else if((1<=n->left->label)&&(n->left->label<n->right->label)&&(n->left->label<nr))
            {
                        swap('r');
                        gencode(n->right,'r');
                        strcpy(R,pop('r'));
                        gencode(n->left,'l');
                        printf("\n%s %s, %s",n->data,R,top('r'));
                        push('r',R);
                        swap('r');
            }
            else if((1<=n->right->label)&&(n->right->label<=n->left->label)&&(n->right->label<nr))
            {
                        gencode(n->left,'l');
                        strcpy(R,pop('r'));
                        gencode(n->right,'r');
                        printf("\n%s %s, %s",n->data,top('r'),R);
                        push('r',R);
            }
            else
            {
                        gencode(n->right,'r');
                        strcpy(T,pop('t'));
                        printf("\nMOV %s, %s",top('r'),T);
                        gencode(n->left,'l');
                        printf("\n%s , %s , %s",n->data,T,top('r'));
                        push('t',T);
            }
}


Code-gen.l

%{
            #include "y.tab.h"      
            #include<string.h>
            #include<math.h>
%}
%%
[a-zA-Z] {strcpy(yylval.s,yytext); return ID;}
[-+*/()=] { return yytext[0];}
"$" {return 0;}
[\n] {return 0;} // return yytext[0];
%%
/*

/****************OUTPUT****************

(a+b)*(c-d)-(e/f+(m+n))

MOV e, R0
DIv, f, R0
MOV m, R1
ADD, n, R1
ADD R1, R0
MOV R0, T0
MOV a, R0
ADD, b, R0
MOV c, R1
SUB, d, R1
MUL R1, R0
SUB , T0 , R0
The syntax tree is:-

NODE   LABEL
=============
a          1
ADD   1
b          0
MUL   2
c          1
SUB    1
d          0
SUB    3
e          1
DIv      1
f           0
ADD   2
m         1
ADD   1
n          0
pccoecomp@STAFFPC:~$
*/