|
Lexical analysis is the starting point in compiler design. With this process, given expression can be analyzed to satisfy the required format so the expression is separated into its symbols and the state of the symbol can be determined.
The Algorithm 

C Source Code
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <conio.h> typedef struct t1tag { char nm[20]; } t1; t1 Typs[] = {"keyword", "operator", "identifier", "number", "string", "illegal character", "assignment", "paranthesis", "end of series", "relational operator", "array start", "array end", "seperator", "Unary operator", "compiler directive", "converter", "range"}; // These are some reserved words in Pascal. t1 Rsrv[] = {"BEGIN","END","IF","THEN","ELSE","DO","FOR","TO","WHILE","REPEAT", "UNTIL","PROCEDURE","FUNCTION","INTEGER","REAL","BYTE","CHAR", "TEXT", "ARRAY","VAR","RECORD","SET","TYPE","AND","CASE","CONST", "DIV", "DOWNTO","EXTERNAL","FILE","FORWARD","GOTO","IN","INLINE", "INTERRUPT", "LABEL","MOD","NIL","NOT","OF","OR","PACKED","PROGRAM", "WITH", "BOOLEAN","INPUT","OUTPUT", "WRITE", "WRITELN" }; long s = 49; char oprtr[] = { '.', ',', ';', '=', ':', '[', ']', '(', ')', '-', '+', '/', '*', '>', '<', '$', '#', '@'}; long t = 18; typedef struct ttyptg { long no; char ctkn[50]; int typ; long lline; } ttyp; int isReserved(char tmp[]) { long i=0; char *str; str = _strupr( _strdup( tmp ) ); while ( (i < s) && ( strcmp(str, Rsrv[i].nm) != 0) ) { i++; } if ( strcmp(str, Rsrv[i].nm) != 0) // not equal, not a keyword return 1;
return 0; } void main(void) { long i, j, ind, zz, N, NN, ln; char ch, nxt, instr[1024], tmp[100], FName[256]; ttyp Token[1024]; FILE *f;
printf("Enter a Pascal File Name: \n"); scanf("%s", FName);
f = fopen(FName,"r");
ln = 0; NN = 0; while (!feof(f)) { N = 0; ch = 0; while ( ('\n' != (ch = getc(f))) && (!feof(f)) ) // end of line check { instr[N] = ch; N++; } instr[N] = 0; N++;
// TOKEN construction starts here ind = 0; while (ind < (N-1)) // { ch = instr[ind]; while (ch == 32) // space check { ind++; ch = instr[ind]; } if ( ind < N ) // if whole line has spaces, wil not be gone in. { // first 'operator' control will be done. j = 0; while( (j < t) && (ch != oprtr[j]) ) j++; if ( ch == oprtr[j] ) // so ch is an operant. { zz = 0; tmp[0] = ch; if ( ind < N ) nxt = instr[ind + 1]; if ((nxt == '>') || (nxt == '=')) // can be one of "<>, <=, >=, :=" . { zz++; tmp[1] = nxt; ind +=2; if ( nxt == '>' ) Token[NN].typ = 9; // relational operator } else if ( (ch == '.') && (nxt == '.')) { zz++; tmp[1] = nxt; ind +=2; Token[NN].typ = 16; // range } else ind++; switch (ch) { case ':': if (nxt == '=') Token[NN].typ = 6; else Token[NN].typ = 1; break; case '+': case '-': case '*': case '/': Token[NN].typ = 1; break; // operator case '.': if ( nxt != '.') Token[NN].typ = 1; break; // operator case ';': Token[NN].typ = 8; break; // end of series case ',': Token[NN].typ = 12; break; // seperator case '=': case '>': case '<': Token[NN].typ = 9; break; // relational oparator case '[': Token[NN].typ = 10; break; // array start case ']': Token[NN].typ = 11; break; // array end case '(': case ')': Token[NN].typ = 7; break; // paranthesis case '$': Token[NN].typ = 14; break; // compiler directive case '#': Token[NN].typ = 15; break; // ASCII converter case '@': Token[NN].typ = 13; break; // unary operator } zz++; tmp[zz] = 0; Token[NN].no = NN; strcpy(Token[NN].ctkn, tmp); Token[NN].lline = ln; NN++; } else if (ch == 39) // 39 = " ' " : String start { zz = 0; ind++; nxt = instr[ind]; tmp[0] = nxt; while ( nxt != 39) // end of string { zz++; ind++; tmp[zz] = nxt = instr[ind]; } tmp[zz] = 0; Token[NN].no = NN; strcpy(Token[NN].ctkn, tmp); Token[NN].typ = 4; Token[NN].lline = ln; NN++; ind++; } else if ( ((ch >= 65) && (ch <= 90)) || ((ch >= 97) && (ch <= 122)) || (ch == 95) ) // if ch is A..Z or a..z or '_' it is identifier or reserved word { zz = 0; tmp[0] = ch; nxt = instr[ind + 1]; zz++; while ( ((nxt >= 65) && (nxt <= 90)) || ((nxt >= 97) && (nxt <= 122)) || (nxt == 95) || ((nxt >= 48) && (nxt <= 57)) ) // A..Z arasy, a..z arasy, '_', 0..9 arasy { ind++; ch = instr[ind]; tmp[zz] = ch; zz++; nxt = instr[ind + 1]; } tmp[zz] = 0; if ( isReserved(tmp) == 0) Token[NN].typ = 0; // keyword = reserved else Token[NN].typ = 2; // identifier Token[NN].no = NN; strcpy(Token[NN].ctkn, tmp); Token[NN].lline = ln; NN++; ind++; } else if ( (ch >= 48) && (ch <= 57) ) // 0..9 : 'number' { zz = 0; tmp[0] = ch; nxt = instr[ind + 1]; zz++; while ( ((nxt >= 48) && (nxt <= 57)) ) // take all 0..9 numbers. { ind++; ch = instr[ind]; tmp[zz] = ch; zz++; nxt = instr[ind + 1]; } tmp[zz] = 0; Token[NN].typ = 3; // constant Token[NN].no = NN; strcpy(Token[NN].ctkn, tmp); Token[NN].lline = ln; NN++; ind++; } else // illegal charater { Token[NN].no = NN; Token[NN].ctkn[0] = ch; Token[NN].ctkn[1] = 0; Token[NN].typ = 5; Token[NN].lline = ln; NN++; ind++; } // if } // if } // while ln++; // increase line number by one } // while
fclose(f);
f = fopen("Token.txt", "w"); fprintf(f, "\n NO LINE TOKEN TIP\n"); fprintf(f, "______________________________________________________________________ ____\n"); for(i=0;i<NN;i++) { fprintf(f, " %4ld %4ld %30s %20s\n", Token[i].no, Token[i].lline, Token[i].ctkn, Typs[Token[i].typ].nm); } fclose(f); }
Sample Executions 1) Enter a Pascal File Name: ASALBUL.PAS uses crt; var i,j,k,n:integer; A, B:array[1..100] of integer; BEGIN clrscr; n:= 100; for i:=1 to n do A[i] := i; i := 2; j := 0; while (i < n) do begin j := j + 1; B[j] := i; k := i*i; while (k < n) do begin A[k] := 0; k := k+i; end; i := i+1; while ((A[i] = 0) and (i < n)) do i := i+1; end; writeln('1..50 Arasi Asal Sayilar:'); writeln('Asal Sayi Adedi:',j); for i:=1 to j do write(B[i],' '); readln; END. Output NO LINE TOKEN TIP ____________________________________________ 0 0 uses identifier 1 0 crt identifier 2 0 ; end of series 3 1 var keyword 4 1 i identifier 5 1 , seperator 6 1 j identifier 7 1 , seperator 8 1 k identifier 9 1 , seperator 10 1 n identifier 11 1 : operator 12 1 integer keyword 13 1 ; end of series 14 1 A identifier 15 1 , seperator 16 1 B identifier 17 1 : operator 18 1 array keyword 19 1 [ array start 20 1 1 number 21 1 .. range 22 1 100 number 23 1 ] array end 24 1 of keyword 25 1 integer keyword 26 1 ; end of series 27 3 BEGIN keyword 28 4 clrscr identifier 29 4 ; end of series 30 5 n identifier 31 5 := assignment 32 5 100 number 33 5 ; end of series 34 6 for keyword 35 6 i identifier 36 6 := assignment 37 6 1 number 38 6 to keyword 39 6 n identifier 40 6 do keyword 41 6 A identifier 42 6 [ array start 43 6 i identifier 44 6 ] array end 45 6 := assignment 46 6 i identifier 47 6 ; end of series 48 7 i identifier 49 7 := assignment 50 7 2 number 51 7 ; end of series 52 8 j identifier 53 8 := assignment 54 8 0 number 55 8 ; end of series 56 9 while keyword 57 9 ( paranthesis 58 9 i identifier 59 9 < relational operator 60 9 n identifier 61 9 ) paranthesis 62 9 do keyword 63 10 begin keyword 64 11 j identifier 65 11 := assignment 66 11 j identifier 67 11 + operator 68 11 1 number 69 11 ; end of series 70 12 B identifier 71 12 [ array start 72 12 j identifier 73 12 ] array end 74 12 := assignment 75 12 i identifier 76 12 ; end of series 77 13 k identifier 78 13 := assignment 79 13 i identifier 80 13 * operator 81 13 i identifier 82 13 ; end of series 83 14 while keyword 84 14 ( paranthesis 85 14 k identifier 86 14 < relational operator 87 14 n identifier 88 14 ) paranthesis 89 14 do keyword 90 15 begin keyword 91 16 A identifier 92 16 [ array start 93 16 k identifier 94 16 ] array end 95 16 := assignment 96 16 0 number 97 16 ; end of series 98 17 k identifier 99 17 := assignment 100 17 k identifier 101 17 + operator 102 17 i identifier 103 17 ; end of series 104 18 end keyword 105 18 ; end of series 106 19 i identifier 107 19 := assignment 108 19 i identifier 109 19 + operator 110 19 1 number 111 19 ; end of series 112 20 while keyword 113 20 ( paranthesis 114 20 ( paranthesis 115 20 A identifier 116 20 [ array start 117 20 i identifier 118 20 ] array end 119 20 = relational operator 120 20 0 number 121 20 ) paranthesis 122 20 and keyword 123 20 ( paranthesis 124 20 i identifier 125 20 < relational operator 126 20 n identifier 127 20 ) paranthesis 128 20 ) paranthesis 129 20 do keyword 130 20 i identifier 131 20 := assignment 132 20 i identifier 133 20 + operator 134 20 1 number 135 20 ; end of series 136 21 end keyword 137 21 ; end of series 138 22 writeln keyword 139 22 ( paranthesis 140 22 1..50 Arasi Asal Sayilar: string 141 22 ) paranthesis 142 22 ; end of series 143 23 writeln keyword 144 23 ( paranthesis 145 23 Asal Sayi Adedi: string 146 23 , seperator 147 23 j identifier 148 23 ) paranthesis 149 23 ; end of series 150 24 for keyword 151 24 i identifier 152 24 := assignment 153 24 1 number 154 24 to keyword 155 24 j identifier 156 24 do keyword 157 24 write keyword 158 24 ( paranthesis 159 24 B identifier 160 24 [ array start 161 24 i identifier 162 24 ] array end 163 24 , seperator 164 24 string 165 24 ) paranthesis 166 24 ; end of series 167 25 readln identifier 168 25 ; end of series 169 26 END keyword 170 26 . operator 2) Enter a Pascal File Name: ASALBUL.PAS uses crt; var A, S:array [1..20] of integer; i, n, k, say, l, z:integer; ch : char; begin clrscr; write('Kümenin eleman sayisi:');readln(n); writeln('Küme elemanlari:'); for i:=1 to n do read(a[i]); z:=2; for i:=1 to n-1 do z:=z*2; for i:=1 to n do s[i]:=0; i:=0; while i<>z do begin i:=i+1; s[n]:=s[n]+1; for k:=n downto 1 do if s[k]>1 then begin s[k]:=s[k]-2; s[k-1]:=s[k-1]+1; end else k:=1; if i<>z then write(i:3,'. alt küme = '); for l:=1 to n do begin if s[l]=1 then write(a[l]:3); if wherey > 24 then begin repeat ch:=readkey until ch=#32; clrscr; end; end; writeln; if wherey > 24 then begin repeat ch:=readkey until ch=#32; clrscr; end; end; readkey; end. Output NO LINE TOKEN TIP ____________________________________________ 0 0 uses identifier 1 0 crt identifier 2 0 ; end of series 3 1 var keyword 4 2 A identifier 5 2 , seperator 6 2 S identifier 7 2 : operator 8 2 array keyword 9 2 [ array start 10 2 1 number 11 2 .. range 12 2 20 number 13 2 ] array end 14 2 of keyword 15 2 integer keyword 16 2 ; end of series 17 3 i identifier 18 3 , seperator 19 3 n identifier 20 3 , seperator 21 3 k identifier 22 3 , seperator 23 3 say identifier 24 3 , seperator 25 3 l identifier 26 3 , seperator 27 3 z identifier 28 3 : operator 29 3 integer keyword 30 3 ; end of series 31 4 ch identifier 32 4 : operator 33 4 char keyword 34 4 ; end of series 35 5 begin keyword 36 6 clrscr identifier 37 6 ; end of series 38 7 write keyword 39 7 ( paranthesis 40 7 Kümenin eleman sayisi: string 41 7 ) paranthesis 42 7 ; end of series 43 7 readln identifier 44 7 ( paranthesis 45 7 n identifier 46 7 ) paranthesis 47 7 ; end of series 48 8 writeln keyword 49 8 ( paranthesis 50 8 Küme elemanlari: string 51 8 ) paranthesis 52 8 ; end of series 53 9 for keyword 54 9 i identifier 55 9 := assignment 56 9 1 number 57 9 to keyword 58 9 n identifier 59 9 do keyword 60 9 read identifier 61 9 ( paranthesis 62 9 a identifier 63 9 [ array start 64 9 i identifier 65 9 ] array end 66 9 ) paranthesis 67 9 ; end of series 68 10 z identifier 69 10 := assignment 70 10 2 number 71 10 ; end of series 72 11 for keyword 73 11 i identifier 74 11 := assignment 75 11 1 number 76 11 to keyword 77 11 n identifier 78 11 - operator 79 11 1 number 80 11 do keyword 81 11 z identifier 82 11 := assignment 83 11 z identifier 84 11 * operator 85 11 2 number 86 11 ; end of series 87 12 for keyword 88 12 i identifier 89 12 := assignment 90 12 1 number 91 12 to keyword 92 12 n identifier 93 12 do keyword 94 12 s identifier 95 12 [ array start 96 12 i identifier 97 12 ] array end 98 12 := assignment 99 12 0 number 100 12 ; end of series 101 13 i identifier 102 13 := assignment 103 13 0 number 104 13 ; end of series 105 14 while keyword 106 14 i identifier 107 14 <> relational operator 108 14 z identifier 109 14 do keyword 110 15 begin keyword 111 16 i identifier 112 16 := assignment 113 16 i identifier 114 16 + operator 115 16 1 number 116 16 ; end of series 117 17 s identifier 118 17 [ array start 119 17 n identifier 120 17 ] array end 121 17 := assignment 122 17 s identifier 123 17 [ array start 124 17 n identifier 125 17 ] array end 126 17 + operator 127 17 1 number 128 17 ; end of series 129 18 for keyword 130 18 k identifier 131 18 := assignment 132 18 n identifier 133 18 downto keyword 134 18 1 number 135 18 do keyword 136 19 if keyword 137 19 s identifier 138 19 [ array start 139 19 k identifier 140 19 ]> array end 141 19 1 number 142 19 then keyword 143 20 begin keyword 144 21 s identifier 145 21 [ array start 146 21 k identifier 147 21 ] array end 148 21 := assignment 149 21 s identifier 150 21 [ array start 151 21 k identifier 152 21 ] array end 153 21 - operator 154 21 2 number 155 21 ; end of series 156 22 s identifier 157 22 [ array start 158 22 k identifier 159 22 - operator 160 22 1 number 161 22 ] array end 162 22 := assignment 163 22 s identifier 164 22 [ array start 165 22 k identifier 166 22 - operator 167 22 1 number 168 22 ] array end 169 22 + operator 170 22 1 number 171 22 ; end of series 172 23 end keyword 173 24 else keyword 174 25 k identifier 175 25 := assignment 176 25 1 number 177 25 ; end of series 178 26 if keyword 179 26 i identifier 180 26 <> relational operator 181 26 z identifier 182 26 then keyword 183 27 write keyword 184 27 ( paranthesis 185 27 i identifier 186 27 : operator 187 27 3 number 188 27 , seperator 189 27 . alt küme = string 190 27 ) paranthesis 191 27 ; end of series 192 28 for keyword 193 28 l identifier 194 28 := assignment 195 28 1 number 196 28 to keyword 197 28 n identifier 198 28 do keyword 199 29 begin keyword 200 30 if keyword 201 30 s identifier 202 30 [ array start 203 30 l identifier 204 30 ]= array end 205 30 1 number 206 30 then keyword 207 31 write keyword 208 31 ( paranthesis 209 31 a identifier 210 31 [ array start 211 31 l identifier 212 31 ] array end 213 31 : operator 214 31 3 number 215 31 ) paranthesis 216 31 ; end of series 217 32 if keyword 218 32 wherey identifier 219 32 > relational operator 220 32 24 number 221 32 then keyword 222 33 begin keyword 223 34 repeat keyword 224 34 ch identifier 225 34 := assignment 226 34 readkey identifier 227 34 until keyword 228 34 ch identifier 229 34 = relational operator 230 34 # converter 231 34 32 number 232 34 ; end of series 233 35 clrscr identifier 234 35 ; end of series 235 36 end keyword 236 36 ; end of series 237 37 end keyword 238 37 ; end of series 239 38 writeln keyword 240 38 ; end of series 241 39 if keyword 242 39 wherey identifier 243 39 > relational operator 244 39 24 number 245 39 then keyword 246 40 begin keyword 247 41 repeat keyword 248 41 ch identifier 249 41 := assignment 250 41 readkey identifier 251 41 until keyword 252 41 ch identifier 253 41 = relational operator 254 41 # converter 255 41 32 number 256 41 ; end of series 257 42 clrscr identifier 258 42 ; end of series 259 43 end keyword 260 43 ; end of series 261 44 end keyword 262 44 ; end of series 263 45 readkey identifier 264 45 ; end of series 265 46 end keyword 266 46 . operator
|