编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

 

 

 

 

 

单词”分类说明 

标识符(Identifier):变量名和函数名(字母或下划线开头);

关键字(Keyword):系统保留字;

运算符(Operator): + - * / % = == != < <= > >= 等;

分隔符(Separator): , ; . ' " ( ) [ ] { } // /* */ #等;

常量(Constant): 字符串或字符常量;

注释(Note): 注释不参与编译。

 

编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

 

编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

 

程序清单:

/*

     ========================================================

                        词法分析器V1.0

         (DFA,C语言描述,可分析C/C++词法)

                       Author:Estrong

                          2012.05.10 

    ========================================================

*/

 

/*

说明:

    标识符(Identifier):变量名和函数名(字母或下划线开头);

    关键字(Keyword):具有特定的功能系统保留字,不能移作他用;

    运算符(Operator):   + - * / % = == != < <= > >= 等;

    分隔符(Separator):  , ; . ' " ( ) [ ] { } // /米 米/ #等;

    常量(Constant):字符串或字符常量;

    注释(Note): 注释不参与编译。

*/

#include "stdio.h"

#include "string.h"

 

#define MAX_STR 256

#define MAX_KEYWORDS 62

#define MAX_OPERATORS 12

#define MAX_SEPARATORS 15

 

/* 定义分析状态 */

#define STA_START 1

#define STA_IDorKEYWORD 2 /*   IDENTIFIER: 标识符   */

#define STA_NUMBER 3      /*   NUMBER:     数字     */

#define STA_NOTE 4        /*   NOTE:        注释     */

#define STA_CONSTANT 5    /*   CONSTANT:    常量     */

#define STA_DONE  6       /*   DONE:       完成     */

 

/* 定义所属类型 */

#define TYPE_KEYWORD 1    /*   KEYWORD:    保留字   */

#define TYPE_IDENTIFIER 2 /*   IDENTIFIER: 标识符   */

#define TYPE_NUMBER 3     /*   NUMBER:     数字     */

#define TYPE_NOTE 4       /*   NOTE:        注释     */

#define TYPE_CONSTANT 5   /*   CONSTANT:    常量     */

#define TYPE_OPERATOR 6   /*   OPERATOR:    运算符   */

#define TYPE_SEPARATOR 7  /*   SEPARATOR:   分隔符   */

#define TYPE_ERROR 8      /*   ERROR:       错误     */

#define TYPE_UNKNOWN 9    /*   UNKNOWN:     未知     */

#define TYPE_ENDFILE 10   /*   ENDFILE:     文件结束 */

 

char *Operators[MAX_OPERATORS] = {"+","-","*","/","%","=","==","!=","<","<=",">",">="};

char *Separators[MAX_SEPARATORS] ={",",";",".","'",""","(",")","[","]","{","}","//","/*","*/","#"};

char *Keywords[MAX_KEYWORDS] = {"include","define","auto","bool","break","case","catch","char","class",

                                "const","const_cast","continue","default","delete","do","double",

                                "dynamic_cast","else","enum","explicit","extern","false","float","for",

                                "friend","goto","if","inline","int","long","mutable","namespace","new",

                                "operator","private","protected","public","register","reinterpret_cast",

                                "return","short","signed","sizeof","static","static_cast","struct",

                                "switch","template","this","throw","true","try","typedef","typeid",

                                "typename","union","unsigned","using","virtual","void","volatile","while"};

 

/* 是否为运算符 */

int IsOperator(char c)

{

    int i;

    for(i=0;i<MAX_OPERATORS;i++)

        if(Operators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否为分隔符 */

int IsSeparator(char c)

{

    int i;

    for(i=0;i<MAX_SEPARATORS;i++)

        if(Separators[i][0]==c)

            return 1;

    return 0;

}

 

/* 是否为保留字 */

int IsKeyword(char *str)

{

    int i;

    for(i=0;i<MAX_KEYWORDS;i++)

        if(strcmp(Keywords[i],str)==0)

            return 1;

    return 0;

}

 

/* DONE一次输出一次 */

void OutputOneDone(FILE *outf,int type,char *str)

{

    if(IsKeyword(str)==1) type=TYPE_KEYWORD;

    switch(type)

    {

        case TYPE_KEYWORD:    fprintf(outf,"       KEYWORD:     ");break;

        case TYPE_IDENTIFIER: fprintf(outf,"       IDENTIFIER:  ");break;

        case TYPE_NUMBER:     fprintf(outf,"       NUMBER:      ");break;

        case TYPE_NOTE:       fprintf(outf,"       NOTE:        ");break;

        case TYPE_CONSTANT:   fprintf(outf,"       CONSTANT:    ");break;

        case TYPE_OPERATOR:   fprintf(outf,"       OPERATOR:    ");break;

        case TYPE_SEPARATOR:  fprintf(outf,"       SEPARATOR:   ");break;

        case TYPE_ERROR:      fprintf(outf,"       ERROR:       ");break;

        case TYPE_UNKNOWN:    fprintf(outf,"       UNKNOWN:     ");break;

        default:break;

    }

    fprintf(outf,"%sn",str);

}

 

/* DFA词法分析函数 */

void LexAnalyse(FILE *inf,FILE *outf)

{

   char c;

   char str[MAX_STR];/* 过程字符串 */

   int i;

   int line_no=1;/* 行号 */

   int state,type;

   char flag_limit_one_line;/* 标志为 / * 注释 范围是一行 */

   char flag_had_got_dot;/* 用于限定小数中只能有一个小数点 */

   fprintf(outf,"Line %d--------------------------------------n",line_no);

   while(!feof(inf))

   {

        i=0;

        state=STA_START;

        flag_limit_one_line=0;

        flag_had_got_dot=0;

        while(state!=STA_DONE)

        {

            c=fgetc(inf);

            switch(state)

            {

                case STA_START:

                                if( c==' ' || c=='t');

                                else if(c=='n')

                                {

                                    line_no++;

                                    fprintf(outf,"Line %d--------------------------------------n",line_no);

                                }

                                else if( (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                {

                                    state=STA_IDorKEYWORD;

                                    type=TYPE_IDENTIFIER;

                                    str[i]=c; i++;

                                }

                                else if(c>='0' && c<='9')

                                {

                                    state=STA_NUMBER;

                                    type=TYPE_NUMBER;

                                    str[i]=c; i++;

                                }

                                else if(c=='/')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(c=='/')   /*   //注释    */

                                    {

                                        flag_limit_one_line=1;

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

 

                                    }

                                    else if(c=='*')       /*   / * 注释开始    */

                                    {

                                        state=STA_NOTE;

                                        type=TYPE_NOTE;

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        state=STA_DONE;

                                        type=TYPE_OPERATOR;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]=''; */

                                    }

                                }

                                else if(c=='<' || c=='>')

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')

                                    {

                                        str[1]='=';i=2;/* str[2]=''    */

                                    }

                                    else/* 暂未考虑<<,>>位移运算符的识别 */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]=''; */

                                    }

                                }

                                else if(c=='!')

                                {

                                    state=STA_DONE;

                                    str[0]=c;

                                    c=fgetc(inf);

                                    if(c=='=')  /*    是!= */

                                    {

                                        type=TYPE_OPERATOR;

                                        str[1]='=';i=2;/* str[2]=''    */

                                    }

                                    else        /*    非!= */

                                    {

                                        type=TYPE_UNKNOWN;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        i=1;/* str[1]=''; */

                                    }

                                }

                                else if(c=='"' || c==''')

                                {

                                    state=STA_CONSTANT;

                                    type=TYPE_CONSTANT;

                                    str[0]=c; i=1;

                                }

                                else if(IsOperator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_OPERATOR;

                                    str[0]=c; i=1;/* str[1]=''; */

                                }

                                else if(IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    type=TYPE_SEPARATOR;

                                    str[0]=c; i=1;/* str[1]=''; */

                                }

                                else if(c==EOF)

                                {

                                    state=STA_DONE;

                                    type=TYPE_ENDFILE;

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_UNKNOWN;

                                }

                                break;/* case STA */

                case STA_IDorKEYWORD:

                                if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_' )

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if((c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_IDENTIFIER;     */

                                    }

                                    else if(c==' ' || c=='t' || c=='n' || IsOperator(c) || IsSeparator(c))

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 标记为ERROR类型 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                        type=TYPE_ERROR;

                                    }

                                }

                                else if(c==' ' || c=='t' || c=='n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                }

                                else

                                {

                                    state=STA_DONE;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NUMBER:

                                if(c>='0' && c<='9')

                                {

                                    str[i]=c; i++;

                                }

                                else if(c=='.')   /* 小数识别  */

                                {

                                    str[i]=c; i++;

                                    c=fgetc(inf);

                                    if(flag_had_got_dot==0)

                                    {

                                    if(c>='0' && c<='9')

                                    {

                                        str[i]=c; i++;

                                        /* type=TYPE_NUMBER; */

                                        flag_had_got_dot=1;

                                    }

                                    else /* if(c==' ' || c=='t' || c=='n' || c>='0' && c<='9' || ...) */

                                    {

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        state=STA_DONE;

                                        type=TYPE_ERROR;         /*  如 date. 或 date.1 标记为ERROR类型 */

                                    }

                                    }

                                    else

                                    {

                                        type=TYPE_ERROR;

                                        str[i]=c; i++;

                                    }

                                }

                                else if(c==' ' || c=='t' || c=='n' || IsOperator(c) || IsSeparator(c))

                                {

                                    state=STA_DONE;

                                    fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                }

                                else

                                {

                                    str[i]=c; i++;

                                    type=TYPE_ERROR;

                                }

                                break;/* case STA */

                case STA_NOTE:

                                if(flag_limit_one_line==1)   /*      是/ * 注释, 限定一行 */

                                {

                                    if(c=='n')

                                    {

                                        state=STA_DONE;

                                        fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节 */

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                else /*      是  / *   注释 */

                                {

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='*')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        if(c=='/')

                                        {

                                            state=STA_DONE;

                                            str[i]=c; i++;

                                        }

                                        else

                                        {

                                            if(feof(inf))

                                            {

                                                state=STA_DONE;

                                                type=TYPE_ERROR;

                                            }

                                            fseek(inf, -1, SEEK_CUR);/* 文件流指针前移1个字节  */

                                        }

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                }

                                break;/* case STA */

                case STA_CONSTANT:

                                    if(feof(inf))

                                    {

                                        state=STA_DONE;

                                        type=TYPE_ERROR;

                                    }

                                    else if(c=='n')

                                    {

                                        line_no++;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='"' || c==''')

                                    {

                                        state=STA_DONE;

                                        str[i]=c; i++;

                                    }

                                    else if(c=='\')

                                    {

                                        str[i]=c; i++;

                                        c=fgetc(inf);

                                        str[i]=c; i++;

                                    }

                                    else

                                    {

                                        str[i]=c; i++;

                                    }

                                    break;/* case STA */

                case STA_DONE:  break;/* case STA */

                default:        break;/* case STA */

            }

        }/* state=STA_DONE */

        str[i]='';

        OutputOneDone(outf,type,str);/* DONE一次输出一次 */

   }/* feof(inf) */

}

 

/* 主函数 */

main()

{

    FILE *input,*output;

    if((input=fopen("input.c","r"))==NULL)

    {

        printf("Cannot find the file!nStrike any key to exit!n");

        system("pause");

        exit(1);

    }

    else

    {

        output=fopen("output.c","w");

        LexAnalyse(input,output);

        fprintf(output,"----------------------------------------------END OF FILE!n");

        fclose(input);

        fclose(output);

        printf("Lexical Analyzer has finished the analyzation!nFor more information please see the file output.txt.n");

        system("pause");

    }

}

 

 

原文链接: https://www.cnblogs.com/Estrong/archive/2012/11/05/2744548.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

    编译原理-词法分析器(DFA,C语言描述,可分析C/C++词法)

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/68142

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月9日 下午1:12
下一篇 2023年2月9日 下午1:12

相关推荐