fajarlabs/contoh_lexical.c

## contoh_lexical.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <conio.h>

typedef struct t1tag
{
    char nm[20];
} t1;

t1 Typs[] = {"keyword", "operator", "identifier", "number", "string", "illegal character",
    "assignment", "paranthesis", "end of series", "relational operator",
    "array start", "array end", "seperator", "Unary operator",
"compiler directive", "converter", "range"};

// Ini adalah beberapa kata-kata dicadangkan di Pascal.
t1 Rsrv[] = {"BEGIN","END","IF","THEN","ELSE","DO","FOR","TO","WHILE","REPEAT",
    "UNTIL","PROCEDURE","FUNCTION","INTEGER","REAL","BYTE","CHAR",
    "TEXT", "ARRAY","VAR","RECORD","SET","TYPE","AND","CASE","CONST",
    "DIV", "DOWNTO","EXTERNAL","FILE","FORWARD","GOTO","IN","INLINE",
    "INTERRUPT", "LABEL","MOD","NIL","NOT","OF","OR","PACKED","PROGRAM",
"WITH", "BOOLEAN","INPUT","OUTPUT", "WRITE", "WRITELN" };

long s = 49;

// Ini adalah operator
char oprtr[] = { '.', ',', ';', '=', ':', '[', ']', '(', ')', '-', '+', '/', '*','>', '<', '$', '#', '@'};
long t = 18;
typedef struct ttyptg
{
    long no;
    char ctkn[50];
    int typ;
    long lline;
} ttyp;

int isReserved(char tmp[])
{
    long i=0;
    char *str;

    str = _strupr( _strdup( tmp ) );
    while ( (i < s) && ( strcmp(str, Rsrv[i].nm) != 0) )
    {
        i++;
    }
    if ( strcmp(str, Rsrv[i].nm) != 0) // tidak sama atau bukan keyword
    return 1;

    return 0;
}

// RUN PROGRAM
void main(void)
{
    long i, j, ind, zz, N, NN, ln;
    char ch, nxt, instr[1024], tmp[100], FName[256];
    ttyp Token[1024];
    FILE *f;

    printf("Enter a Pascal File Name: n");
    scanf("%s", FName);

    f = fopen(FName,"r");

    ln = 0;
    NN = 0;
    while (!feof(f))
    {
        N = 0;
        ch = 0;
        while ( ('n' != (ch = getc(f))) && (!feof(f)) ) // end of line check
        {
            instr[N] = ch;
            N++;
        }
        instr[N] = 0;
        N++;

        //  TOKEN construction starts here
        ind = 0;
        while (ind < (N-1))  //
        {
            ch = instr[ind];
            while (ch == 32) // space check
            {
                ind++;
                ch = instr[ind];
            }
            if ( ind < N ) // if whole line has spaces, wil not be gone in.
            {
                // first 'operator' control will be done.
                j = 0;
                while( (j < t) && (ch != oprtr[j]) )
                j++;
                if ( ch == oprtr[j] ) // so ch is an operant.
                {
                    zz = 0;
                    tmp[0] = ch;
                    if ( ind < N )
                    nxt = instr[ind + 1];
                    if ((nxt == '>') || (nxt == '=')) // can be one of "<>, <=, >=, :=" .
                    {
                        zz++;
                        tmp[1] = nxt;
                        ind +=2;
                        if ( nxt == '>' )
                        Token[NN].typ = 9; // relational operator
                    }
                    else if ( (ch == '.') && (nxt == '.'))
                    {
                        zz++;
                        tmp[1] = nxt;
                        ind +=2;
                        Token[NN].typ = 16; // range
                    }
                    else
                    ind++;
                    switch (ch)
                    {
                        case ':': if (nxt == '=')
                        Token[NN].typ = 6;
                        else
                        Token[NN].typ = 1;
                        break;
                        case '+':
                        case '-':
                        case '*':
                        case '/': Token[NN].typ = 1; break; // operator
                        case '.': if ( nxt != '.')
                        Token[NN].typ = 1; break; // operator
                        case ';': Token[NN].typ = 8; break; // end of series
                        case ',': Token[NN].typ = 12; break; // seperator
                        case '=':
                        case '>':
                        case '<': Token[NN].typ = 9; break; // relational oparator
                        case '[': Token[NN].typ = 10; break; // array start
                        case ']': Token[NN].typ = 11; break; // array end
                        case '(':
                        case ')': Token[NN].typ = 7; break; // paranthesis
                        case '$': Token[NN].typ = 14; break; // compiler directive
                        case '#': Token[NN].typ = 15; break; // ASCII converter
                        case '@': Token[NN].typ = 13; break; // unary operator
                    }
                    zz++;
                    tmp[zz] = 0;
                    Token[NN].no = NN;
                    strcpy(Token[NN].ctkn, tmp);
                    Token[NN].lline = ln;
                    NN++;
                }
                else if (ch == 39) // 39 = " ' " : String start
                {
                    zz = 0;
                    ind++;
                    nxt = instr[ind];
                    tmp[0] = nxt;
                    while ( nxt != 39) // end of string
                    {
                        zz++;
                        ind++;
                        tmp[zz] = nxt = instr[ind];
                    }
                    tmp[zz] = 0;
                    Token[NN].no = NN;
                    strcpy(Token[NN].ctkn, tmp);
                    Token[NN].typ = 4;
                    Token[NN].lline = ln;
                    NN++;
                    ind++;
                }
                else if ( ((ch >= 65) && (ch <= 90)) || ((ch >= 97) &&
                (ch <= 122)) || (ch == 95) )
                // if ch is A..Z or a..z or '_' it is identifier or reserved word
                {
                    zz = 0;
                    tmp[0] = ch;
                    nxt = instr[ind + 1];
                    zz++;
                    while ( ((nxt >= 65) && (nxt <= 90)) || ((nxt >= 97) &&
                    (nxt <= 122)) || (nxt == 95) || ((nxt >= 48) && (nxt <= 57)) )
                    // A..Z arasy, a..z arasy, '_', 0..9 arasy
                    {
                        ind++;
                        ch = instr[ind];
                        tmp[zz] = ch;
                        zz++;
                        nxt = instr[ind + 1];
                    }
                    tmp[zz] = 0;
                    if ( isReserved(tmp) == 0)
                    Token[NN].typ = 0; // keyword = reserved
                    else
                    Token[NN].typ = 2; // identifier
                    Token[NN].no = NN;
                    strcpy(Token[NN].ctkn, tmp);
                    Token[NN].lline = ln;
                    NN++;
                    ind++;
                }
                else if ( (ch >= 48) && (ch <= 57) )    // 0..9 : 'number'
                {
                    zz = 0;
                    tmp[0] = ch;
                    nxt = instr[ind + 1];
                    zz++;
                    while ( ((nxt >= 48) && (nxt <= 57)) )    // take all 0..9 numbers.
                    {
                        ind++;
                        ch = instr[ind];
                        tmp[zz] = ch;
                        zz++;
                        nxt = instr[ind + 1];
                    }
                    tmp[zz] = 0;
                    Token[NN].typ = 3; // constant
                    Token[NN].no = NN;
                    strcpy(Token[NN].ctkn, tmp);
                    Token[NN].lline = ln;
                    NN++;
                    ind++;
                }
                else // illegal charater
                {
                    Token[NN].no = NN;
                    Token[NN].ctkn[0] = ch;
                    Token[NN].ctkn[1] = 0;
                    Token[NN].typ = 5;
                    Token[NN].lline = ln;
                    NN++;
                    ind++;
                } // if
            } // if
        } // while
        ln++; // increase line number by one
    } // while

    fclose(f);

    f = fopen("Token.txt", "w");
    fprintf(f, "n  NO       LINE              TOKEN                              TIPn");
    fprintf(f, "__________________________________________________________________________n");
    for(i=0;i<NN;i++)
    {
        fprintf(f, " %4ld   %4ld    %30s       %20sn",
        Token[i].no, Token[i].lline, Token[i].ctkn, Typs[Token[i].typ].nm);
    }
    fclose(f);

}
	#include <stdlib.h>
	#include <stdio.h>
	#include <string.h>
	#include <conio.h>

	typedef struct t1tag
	{
	char nm[20];
	} t1;

	t1 Typs[] = {"keyword", "operator", "identifier", "number", "string", "illegal character",
	"assignment", "paranthesis", "end of series", "relational operator",
	"array start", "array end", "seperator", "Unary operator",
	"compiler directive", "converter", "range"};

	// Ini adalah beberapa kata-kata dicadangkan di Pascal.
	t1 Rsrv[] = {"BEGIN","END","IF","THEN","ELSE","DO","FOR","TO","WHILE","REPEAT",
	"UNTIL","PROCEDURE","FUNCTION","INTEGER","REAL","BYTE","CHAR",
	"TEXT", "ARRAY","VAR","RECORD","SET","TYPE","AND","CASE","CONST",
	"DIV", "DOWNTO","EXTERNAL","FILE","FORWARD","GOTO","IN","INLINE",
	"INTERRUPT", "LABEL","MOD","NIL","NOT","OF","OR","PACKED","PROGRAM",
	"WITH", "BOOLEAN","INPUT","OUTPUT", "WRITE", "WRITELN" };

	long s = 49;

	// Ini adalah operator
	char oprtr[] = { '.', ',', ';', '=', ':', '[', ']', '(', ')', '-', '+', '/', '*','>', '<', '$', '#', '@'};
	long t = 18;
	typedef struct ttyptg
	{
	long no;
	char ctkn[50];
	int typ;
	long lline;
	} ttyp;

	int isReserved(char tmp[])
	{
	long i=0;
	char *str;

	str = _strupr( _strdup( tmp ) );
	while ( (i < s) && ( strcmp(str, Rsrv[i].nm) != 0) )
	{
	i++;
	}
	if ( strcmp(str, Rsrv[i].nm) != 0) // tidak sama atau bukan keyword
	return 1;

	return 0;
	}

	// RUN PROGRAM
	void main(void)
	{
	long i, j, ind, zz, N, NN, ln;
	char ch, nxt, instr[1024], tmp[100], FName[256];
	ttyp Token[1024];
	FILE *f;

	printf("Enter a Pascal File Name: n");
	scanf("%s", FName);

	f = fopen(FName,"r");

	ln = 0;
	NN = 0;
	while (!feof(f))
	{
	N = 0;
	ch = 0;
	while ( ('n' != (ch = getc(f))) && (!feof(f)) ) // end of line check
	{
	instr[N] = ch;
	N++;
	}
	instr[N] = 0;
	N++;

	// TOKEN construction starts here
	ind = 0;
	while (ind < (N-1)) //
	{
	ch = instr[ind];
	while (ch == 32) // space check
	{
	ind++;
	ch = instr[ind];
	}
	if ( ind < N ) // if whole line has spaces, wil not be gone in.
	{
	// first 'operator' control will be done.
	j = 0;
	while( (j < t) && (ch != oprtr[j]) )
	j++;
	if ( ch == oprtr[j] ) // so ch is an operant.
	{
	zz = 0;
	tmp[0] = ch;
	if ( ind < N )
	nxt = instr[ind + 1];
	if ((nxt == '>') \|\| (nxt == '=')) // can be one of "<>, <=, >=, :=" .
	{
	zz++;
	tmp[1] = nxt;
	ind +=2;
	if ( nxt == '>' )
	Token[NN].typ = 9; // relational operator
	}
	else if ( (ch == '.') && (nxt == '.'))
	{
	zz++;
	tmp[1] = nxt;
	ind +=2;
	Token[NN].typ = 16; // range
	}
	else
	ind++;
	switch (ch)
	{
	case ':': if (nxt == '=')
	Token[NN].typ = 6;
	else
	Token[NN].typ = 1;
	break;
	case '+':
	case '-':
	case '*':
	case '/': Token[NN].typ = 1; break; // operator
	case '.': if ( nxt != '.')
	Token[NN].typ = 1; break; // operator
	case ';': Token[NN].typ = 8; break; // end of series
	case ',': Token[NN].typ = 12; break; // seperator
	case '=':
	case '>':
	case '<': Token[NN].typ = 9; break; // relational oparator
	case '[': Token[NN].typ = 10; break; // array start
	case ']': Token[NN].typ = 11; break; // array end
	case '(':
	case ')': Token[NN].typ = 7; break; // paranthesis
	case '$': Token[NN].typ = 14; break; // compiler directive
	case '#': Token[NN].typ = 15; break; // ASCII converter
	case '@': Token[NN].typ = 13; break; // unary operator
	}
	zz++;
	tmp[zz] = 0;
	Token[NN].no = NN;
	strcpy(Token[NN].ctkn, tmp);
	Token[NN].lline = ln;
	NN++;
	}
	else if (ch == 39) // 39 = " ' " : String start
	{
	zz = 0;
	ind++;
	nxt = instr[ind];
	tmp[0] = nxt;
	while ( nxt != 39) // end of string
	{
	zz++;
	ind++;
	tmp[zz] = nxt = instr[ind];
	}
	tmp[zz] = 0;
	Token[NN].no = NN;
	strcpy(Token[NN].ctkn, tmp);
	Token[NN].typ = 4;
	Token[NN].lline = ln;
	NN++;
	ind++;
	}
	else if ( ((ch >= 65) && (ch <= 90)) \|\| ((ch >= 97) &&
	(ch <= 122)) \|\| (ch == 95) )
	// if ch is A..Z or a..z or '_' it is identifier or reserved word
	{
	zz = 0;
	tmp[0] = ch;
	nxt = instr[ind + 1];
	zz++;
	while ( ((nxt >= 65) && (nxt <= 90)) \|\| ((nxt >= 97) &&
	(nxt <= 122)) \|\| (nxt == 95) \|\| ((nxt >= 48) && (nxt <= 57)) )
	// A..Z arasy, a..z arasy, '_', 0..9 arasy
	{
	ind++;
	ch = instr[ind];
	tmp[zz] = ch;
	zz++;
	nxt = instr[ind + 1];
	}
	tmp[zz] = 0;
	if ( isReserved(tmp) == 0)
	Token[NN].typ = 0; // keyword = reserved
	else
	Token[NN].typ = 2; // identifier
	Token[NN].no = NN;
	strcpy(Token[NN].ctkn, tmp);
	Token[NN].lline = ln;
	NN++;
	ind++;
	}
	else if ( (ch >= 48) && (ch <= 57) ) // 0..9 : 'number'
	{
	zz = 0;
	tmp[0] = ch;
	nxt = instr[ind + 1];
	zz++;
	while ( ((nxt >= 48) && (nxt <= 57)) ) // take all 0..9 numbers.
	{
	ind++;
	ch = instr[ind];
	tmp[zz] = ch;
	zz++;
	nxt = instr[ind + 1];
	}
	tmp[zz] = 0;
	Token[NN].typ = 3; // constant
	Token[NN].no = NN;
	strcpy(Token[NN].ctkn, tmp);
	Token[NN].lline = ln;
	NN++;
	ind++;
	}
	else // illegal charater
	{
	Token[NN].no = NN;
	Token[NN].ctkn[0] = ch;
	Token[NN].ctkn[1] = 0;
	Token[NN].typ = 5;
	Token[NN].lline = ln;
	NN++;
	ind++;
	} // if
	} // if
	} // while
	ln++; // increase line number by one
	} // while

	fclose(f);

	f = fopen("Token.txt", "w");
	fprintf(f, "n NO LINE TOKEN TIPn");
	fprintf(f, "__________________________________________________________________________n");
	for(i=0;i<NN;i++)
	{
	fprintf(f, " %4ld %4ld %30s %20sn",
	Token[i].no, Token[i].lline, Token[i].ctkn, Typs[Token[i].typ].nm);
	}
	fclose(f);

	}