 |
Lexical Analysis in Compiler Design
Lexical analysis is the starting
point in compiler design. With this process given expression can be analyzed to
satisfy the required format so the expression is separated into its symbols and
every symbol's state is determined.
The Algorithm


C Source
Code
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <conio.h>
typedef struct t1tag
{
char nm[20];
} t1;
t1 Typs[] = {"keyword", "operator", "identifier", "number", "string", "illegal character",
"assignment", "paranthesis", "end of series", "relational operator",
"array start", "array end", "seperator", "Unary operator",
"compiler directive", "converter",
"range"};
// These are some reserved words in Pascal.
t1 Rsrv[] = {"BEGIN","END","IF","THEN","ELSE","DO","FOR","TO","WHILE","REPEAT",
"UNTIL","PROCEDURE","FUNCTION","INTEGER","REAL","BYTE","CHAR",
"TEXT", "ARRAY","VAR","RECORD","SET","TYPE","AND","CASE","CONST",
"DIV", "DOWNTO","EXTERNAL","FILE","FORWARD","GOTO","IN","INLINE",
"INTERRUPT", "LABEL","MOD","NIL","NOT","OF","OR","PACKED","PROGRAM",
"WITH", "BOOLEAN","INPUT","OUTPUT", "WRITE", "WRITELN" };
long s = 49;
char oprtr[] = { '.', ',', ';', '=', ':', '[', ']', '(', ')', '-', '+', '/', '*',
'>', '<', '$', '#', '@'};
long t = 18;
typedef struct ttyptg
{
long no;
char ctkn[50];
int typ;
long lline;
} ttyp;
int isReserved(char tmp[])
{
long i=0;
char *str;
str = _strupr( _strdup( tmp ) );
while ( (i < s) && ( strcmp(str, Rsrv[i].nm) != 0) )
{
i++;
}
if ( strcmp(str, Rsrv[i].nm) != 0) // not equal, not a keyword
return 1;
return 0;
}
void main(void)
{
long i, j, ind, zz, N, NN, ln;
char ch, nxt, instr[1024], tmp[100], FName[256];
ttyp Token[1024];
FILE *f;
printf("Enter a Pascal File Name: \n");
scanf("%s", FName);
f = fopen(FName,"r");
ln = 0;
NN = 0;
while (!feof(f))
{
N = 0;
ch = 0;
while ( ('\n' != (ch = getc(f))) && (!feof(f)) ) // end of line check
{
instr[N] = ch;
N++;
}
instr[N] = 0;
N++;
// TOKEN construction starts here
ind = 0;
while (ind < (N-1)) //
{
ch = instr[ind];
while (ch == 32) // space check
{
ind++;
ch = instr[ind];
}
if ( ind < N ) // if whole line has spaces, wil not be gone in.
{
// first 'operator' control will be done.
j = 0;
while( (j < t) && (ch != oprtr[j]) )
j++;
if ( ch == oprtr[j] ) // so ch is an operant.
{
zz = 0;
tmp[0] = ch;
if ( ind < N )
nxt = instr[ind + 1];
if ((nxt == '>') || (nxt == '=')) // can be one of "<>, <=, >=, :=" .
{
zz++;
tmp[1] = nxt;
ind +=2;
if ( nxt == '>' )
Token[NN].typ = 9; // relational operator
}
else if ( (ch == '.') && (nxt == '.'))
{
zz++;
tmp[1] = nxt;
ind +=2;
Token[NN].typ = 16; // range
}
else
ind++;
switch (ch)
{
case ':': if (nxt == '=')
Token[NN].typ = 6;
else
Token[NN].typ = 1;
break;
case '+':
case '-':
case '*':
case '/': Token[NN].typ = 1; break; // operator
case '.': if ( nxt != '.')
Token[NN].typ = 1; break; // operator
case ';': Token[NN].typ = 8; break; // end of series
case ',': Token[NN].typ = 12; break; // seperator
case '=':
case '>':
case '<': Token[NN].typ = 9; break; // relational oparator
case '[': Token[NN].typ = 10; break; // array start
case ']': Token[NN].typ = 11; break; // array end
case '(':
case ')': Token[NN].typ = 7; break; // paranthesis
case '$': Token[NN].typ = 14; break; // compiler directive
case '#': Token[NN].typ = 15; break; // ASCII converter
case '@': Token[NN].typ = 13; break; // unary operator
}
zz++;
tmp[zz] = 0;
Token[NN].no = NN;
strcpy(Token[NN].ctkn, tmp);
Token[NN].lline = ln;
NN++;
}
else if (ch == 39) // 39 = " ' " : String start
{
zz = 0;
ind++;
nxt = instr[ind];
tmp[0] = nxt;
while ( nxt != 39) // end of string
{
zz++;
ind++;
tmp[zz] = nxt = instr[ind];
}
tmp[zz] = 0;
Token[NN].no = NN;
strcpy(Token[NN].ctkn, tmp);
Token[NN].typ = 4;
Token[NN].lline = ln;
NN++;
ind++;
}
else if ( ((ch >= 65) && (ch <= 90)) || ((ch >= 97) &&
(ch <= 122)) || (ch == 95) )
// if ch is A..Z or a..z or '_' it is identifier or reserved word
{
zz = 0;
tmp[0] = ch;
nxt = instr[ind + 1];
zz++;
while ( ((nxt >= 65) && (nxt <= 90)) || ((nxt >= 97) &&
(nxt <= 122)) || (nxt == 95) || ((nxt >= 48) && (nxt <= 57)) )
// A..Z arasy, a..z arasy, '_', 0..9 arasy
{
ind++;
ch = instr[ind];
tmp[zz] = ch;
zz++;
nxt = instr[ind + 1];
}
tmp[zz] = 0;
if ( isReserved(tmp) == 0)
Token[NN].typ = 0; // keyword = reserved
else
Token[NN].typ = 2; // identifier
Token[NN].no = NN;
strcpy(Token[NN].ctkn, tmp);
Token[NN].lline = ln;
NN++;
ind++;
}
else if ( (ch >= 48) && (ch <= 57) ) // 0..9 : 'number'
{
zz = 0;
tmp[0] = ch;
nxt = instr[ind + 1];
zz++;
while ( ((nxt >= 48) && (nxt <= 57)) ) // take all 0..9 numbers.
{
ind++;
ch = instr[ind];
tmp[zz] = ch;
zz++;
nxt = instr[ind + 1];
}
tmp[zz] = 0;
Token[NN].typ = 3; // constant
Token[NN].no = NN;
strcpy(Token[NN].ctkn, tmp);
Token[NN].lline = ln;
NN++;
ind++;
}
else // illegal charater
{
Token[NN].no = NN;
Token[NN].ctkn[0] = ch;
Token[NN].ctkn[1] = 0;
Token[NN].typ = 5;
Token[NN].lline = ln;
NN++;
ind++;
} // if
} // if
} // while
ln++; // increase line number by one
} // while
fclose(f);
f = fopen("Token.txt", "w");
fprintf(f, "\n NO LINE TOKEN TIP\n");
fprintf(f, "______________________________________________________________________
____\n");
for(i=0;i<NN;i++)
{
fprintf(f, " %4ld %4ld %30s %20s\n",
Token[i].no, Token[i].lline, Token[i].ctkn, Typs[Token[i].typ].nm);
}
fclose(f);
}
SAMPLE EXECUTIONS
1)
Enter a
Pascal File Name: ASALBUL.PAS
uses crt;
var
i,j,k,n:integer; A, B:array[1..100] of integer;
BEGIN
clrscr;
n:= 100;
for i:=1
to n do A[i] := i;
i := 2;
j := 0;
while (i
< n) do
begin
j := j
+ 1;
B[j] :=
i;
k :=
i*i;
while
(k < n) do
begin
A[k]
:= 0;
k :=
k+i;
end;
i :=
i+1;
while
((A[i] = 0) and (i < n)) do i := i+1;
end;
writeln('1..50 Arasi Asal Sayilar:');
writeln('Asal Sayi Adedi:',j);
for i:=1 to
j do write(B[i],' ');
readln;
END.
Output
NO
LINE TOKEN TIP
____________________________________________
0
0 uses identifier
1
0 crt identifier
2
0 ; end of series
3
1 var keyword
4
1 i identifier
5
1 , seperator
6
1 j identifier
7
1 , seperator
8 1
k identifier
9
1 , seperator
10
1 n identifier
11
1 : operator
12
1 integer keyword
13
1 ; end of series
14
1 A identifier
15
1 , seperator
16
1 B identifier
17
1 : operator
18
1 array keyword
19
1 [ array start
20
1 1 number
21
1 .. range
22
1 100 number
23
1 ] array end
24
1 of keyword
25
1 integer keyword
26
1 ; end of series
27
3 BEGIN keyword
28
4 clrscr identifier
29
4 ; end of series
30
5 n identifier
31
5 := assignment
32
5 100 number
33
5 ; end of series
34
6 for keyword
35
6 i identifier
36
6 := assignment
37
6 1 number
38
6 to keyword
39
6 n identifier
40
6 do keyword
41
6 A identifier
42
6 [ array start
43
6 i identifier
44
6 ] array end
45
6 := assignment
46
6 i identifier
47
6 ; end of series
48
7 i identifier
49
7 := assignment
50
7 2 number
51
7 ; end of series
52
8 j identifier
53 8
:= assignment
54
8 0 number
55
8 ; end of series
56
9 while keyword
57
9 ( paranthesis
58
9 i identifier
59
9 < relational operator
60
9 n identifier
61
9 ) paranthesis
62
9 do keyword
63
10 begin keyword
64
11 j identifier
65
11 := assignment
66
11 j identifier
67
11 + operator
68
11 1 number
69
11 ; end of series
70
12 B identifier
71
12 [ array start
72
12 j identifier
73
12 ] array end
74
12 := assignment
75
12 i identifier
76
12 ; end of series
77
13 k identifier
78
13 := assignment
79
13 i identifier
80
13 * operator
81
13 i identifier
82
13 ; end of series
83
14 while keyword
84
14 ( paranthesis
85
14 k identifier
86
14 < relational operator
87
14 n identifier
88
14 ) paranthesis
89
14 do keyword
90
15 begin keyword
91
16 A identifier
92
16 [ array start
93
16 k identifier
94
16 ] array end
95
16 := assignment
96
16 0 number
97
16 ; end of series
98 17
k identifier
99
17 := assignment
100
17 k identifier
101
17 + operator
102
17 i identifier
103
17 ; end of series
104
18 end keyword
105
18 ; end of series
106
19 i identifier
107
19 := assignment
108
19 i identifier
109
19 + operator
110
19 1 number
111
19 ; end of series
112
20 while keyword
113
20 ( paranthesis
114
20 ( paranthesis
115
20 A identifier
116
20 [ array start
117
20 i identifier
118
20 ] array end
119
20 = relational operator
120
20 0 number
121
20 ) paranthesis
122
20 and keyword
123
20 ( paranthesis
124
20 i identifier
125
20 < relational operator
126
20 n identifier
127
20 ) paranthesis
128
20 ) paranthesis
129
20 do keyword
130
20 i identifier
131
20 := assignment
132
20 i identifier
133
20 + operator
134
20 1 number
135
20 ; end of series
136 21
end keyword
137
21 ; end of series
138
22 writeln keyword
139
22 ( paranthesis
140
22 1..50 Arasi Asal Sayilar: string
141
22 ) paranthesis
142
22 ; end of series
143
23 writeln keyword
144
23 ( paranthesis
145
23 Asal Sayi Adedi: string
146
23 , seperator
147
23 j identifier
148
23 ) paranthesis
149
23 ; end of series
150
24 for keyword
151
24 i identifier
152
24 := assignment
153
24 1 number
154
24 to keyword
155
24 j identifier
156
24 do keyword
157
24 write keyword
158
24 ( paranthesis
159
24 B identifier
160
24 [ array start
161
24 i identifier
162
24 ] array end
163
24 , seperator
164
24 string
165
24 ) paranthesis
166
24 ; end of series
167
25 readln identifier
168
25 ; end of series
169
26 END keyword
170
26 . operator
2)
Enter a
Pascal File Name: ASALBUL.PAS
uses crt;
var
A, S:array
[1..20] of integer;
i, n, k,
say, l, z:integer;
ch : char;
begin
clrscr;
write('Kümenin
eleman sayisi:');readln(n);
writeln('Küme
elemanlari:');
for i:=1
to n do read(a[i]);
z:=2;
for i:=1
to n-1 do z:=z*2;
for i:=1
to n do s[i]:=0;
i:=0;
while
i<>z do
begin
i:=i+1;
s[n]:=s[n]+1;
for
k:=n downto 1 do
if
s[k]>1 then
begin
s[k]:=s[k]-2;
s[k-1]:=s[k-1]+1;
end
else
k:=1;
if
i<>z then
write(i:3,'. alt küme = ');
for
l:=1 to n do
begin
if
s[l]=1 then
write(a[l]:3);
if
wherey > 24 then
begin
repeat ch:=readkey until ch=#32;
clrscr;
end;
end;
writeln;
if
wherey > 24 then
begin
repeat ch:=readkey until ch=#32;
clrscr;
end;
end;
readkey;
end.
Output
NO
LINE TOKEN TIP
____________________________________________
0
0 uses identifier
1
0 crt identifier
2
0 ; end of series
3
1 var keyword
4
2 A identifier
5
2 , seperator
6
2 S identifier
7
2 : operator
8
2 array keyword
9
2 [ array start
10
2 1 number
11
2 .. range
12
2 20 number
13
2 ] array end
14
2 of keyword
15
2 integer keyword
16
2 ; end of series
17
3 i identifier
18
3 , seperator
19
3 n identifier
20 3
, seperator
21
3 k identifier
22
3 , seperator
23
3 say identifier
24
3 , seperator
25
3 l identifier
26
3 , seperator
27
3 z identifier
28
3 : operator
29
3 integer keyword
30
3 ; end of series
31
4 ch identifier
32
4 : operator
33
4 char keyword
34
4 ; end of series
35
5 begin keyword
36
6 clrscr identifier
37
6 ; end of series
38
7 write keyword
39
7 ( paranthesis
40
7 Kümenin eleman sayisi: string
41
7 ) paranthesis
42
7 ; end of series
43
7 readln identifier
44
7 ( paranthesis
45
7 n identifier
46
7 ) paranthesis
47
7 ; end of series
48
8 writeln keyword
49
8 ( paranthesis
50
8 Küme elemanlari: string
51
8 ) paranthesis
52
8 ; end of series
53
9 for keyword
54
9 i identifier
55
9 := assignment
56
9 1 number
57
9 to keyword
58
9 n identifier
59
9 do keyword
60
9 read identifier
61
9 ( paranthesis
62
9 a identifier
63
9 [ array start
64
9 i identifier
65 9
] array end
66
9 ) paranthesis
67
9 ; end of series
68
10 z identifier
69
10 := assignment
70
10 2 number
71
10 ; end of series
72
11 for keyword
73
11 i identifier
74
11 := assignment
75
11 1 number
76
11 to keyword
77
11 n identifier
78
11 - operator
79
11 1 number
80
11 do keyword
81
11 z identifier
82
11 := assignment
83
11 z identifier
84
11 * operator
85
11 2 number
86
11 ; end of series
87
12 for keyword
88
12 i identifier
89
12 := assignment
90
12 1 number
91
12 to keyword
92
12 n identifier
93
12 do keyword
94
12 s identifier
95
12 [ array start
96
12 i identifier
97
12 ] array end
98
12 := assignment
99
12 0 number
100
12 ; end of series
101
13 i identifier
102
13 := assignment
103
13 0 number
104
13 ; end of series
105
14 while keyword
106
14 i identifier
107
14 <> relational operator
108
14 z identifier
109
14 do keyword
110 15
begin keyword
111
16 i identifier
112
16 := assignment
113
16 i identifier
114
16 + operator
115
16 1 number
116
16 ; end of series
117
17 s identifier
118
17 [ array start
119
17 n identifier
120
17 ] array end
121
17 := assignment
122
17 s identifier
123
17 [ array start
124
17 n identifier
125
17 ] array end
126
17 + operator
127
17 1 number
128
17 ; end of series
129
18 for keyword
130
18 k identifier
131
18 := assignment
132
18 n identifier
133
18 downto keyword
134
18 1 number
135
18 do keyword
136
19 if keyword
137
19 s identifier
138
19 [ array start
139
19 k identifier
140
19 ]> array end
141
19 1 number
142
19 then keyword
143
20 begin keyword
144
21 s identifier
145
21 [ array start
146
21 k identifier
147
21 ] array end
148 21
:= assignment
149
21 s identifier
150
21 [ array start
151
21 k identifier
152
21 ] array end
153
21 - operator
154
21 2 number
155
21 ; end of series
156
22 s identifier
157
22 [ array start
158
22 k identifier
159
22 - operator
160
22 1 number
161
22 ] array end
162
22 := assignment
163
22 s identifier
164
22 [ array start
165
22 k identifier
166
22 - operator
167
22 1 number
168
22 ] array end
169
22 + operator
170
22 1 number
171
22 ; end of series
172
23 end keyword
173
24 else keyword
174
25 k identifier
175
25 := assignment
176
25 1 number
177
25 ; end of series
178
26 if keyword
179
26 i identifier
180
26 <> relational operator
181
26 z identifier
182
26 then keyword
183
27 write keyword
184
27 ( paranthesis
185
27 i identifier
186
27 : operator
187
27 3 number
188
27 , seperator
189
27 . alt küme = string
190
27 ) paranthesis
191
27 ; end of series
192
28 for keyword
193 28
l identifier
194
28 := assignment
195
28 1 number
196
28 to keyword
197
28 n identifier
198
28 do keyword
199
29 begin keyword
200
30 if keyword
201
30 s identifier
202
30 [ array start
203
30 l identifier
204
30 ]= array end
205
30 1 number
206
30 then keyword
207
31 write keyword
208
31 ( paranthesis
209
31 a identifier
210
31 [ array start
211
31 l identifier
212
31 ] array end
213
31 : operator
214
31 3 number
215
31 ) paranthesis
216
31 ; end of series
217
32 if keyword
218
32 wherey identifier
219
32 > relational operator
220
32 24 number
221
32 then keyword
222
33 begin keyword
223
34 repeat keyword
224
34 ch identifier
225
34 := assignment
226
34 readkey identifier
227
34 until keyword
228
34 ch identifier
229
34 = relational operator
230
34 # converter
231
34 32 number
232
34 ; end of series
233
35 clrscr identifier
234
35 ; end of series
235
36 end keyword
236
36 ; end of series
237
37 end keyword
238 37
; end of series
239
38 writeln keyword
240
38 ; end of series
241
39 if keyword
242
39 wherey identifier
243
39 > relational operator
244
39 24 number
245
39 then keyword
246
40 begin keyword
247
41 repeat keyword
248
41 ch identifier
249
41 := assignment
250
41 readkey identifier
251
41 until keyword
252
41 ch identifier
253
41 = relational operator
254
41 # converter
255
41 32 number
256
41 ; end of series
257
42 clrscr identifier
258
42 ; end of series
259
43 end keyword
260
43 ; end of series
261
44 end keyword
262
44 ; end of series
263
45 readkey identifier
264
45 ; end of series
265
46 end keyword
266
46 . operator
|
 |