Bootstrap

编译原理实验(一)

实验一 词法分析

一、 实验目的

编制一个读单词过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。并依次输出各个单词的内部编码及单词符号自身值。

二、实验题目

如源程序为C语言。输入如下一段:

main()
{
    int a=-5,b=4,j;
    if(a>=b)
        j=a-b; 
    else 
        j=b-a;
}

要求输出如下

(2,”main”) (5,”(”)   (5,”)”)
(5,”{”)    (1,”int”)   (2,”a”)
(4,”=”)   (3,”-5”)   (5,”,”)
(2,”b”)   (4,”=”)    (3,”4”)
(5,”,”)    (2,”j”)    (5,”;”)
(1,”if”)   (5,”(”)   (2,”a”)
(4,”>=”)  (2,”b”)    (5,”)”)
(2,”j”)    (4,”=”)    (2,”a”)
(4,”-”)    (2,”b”)    (5,”;”)
(1,”else”) (2,”j”)     (4,”=”)
(2,”b”)   (4,”-”)    (2,”a”)
(5,”;”)    (5,”}”)

三、实验理论依据

识别各种单词符号

  • 程序语言的单词符号一般分为五种:

    1. 关键字(保留字/ 基本字)if 、while 、begin…
    2. 标识符:常量名、变量名…
    3. 常数:34 、56.78 、true 、‘a’ 、…
    4. 运算符:+ 、- 、* 、/ 、〈 、and 、or 、….
    5. 界限符:, ; ( ) { } /*…
  • 识别单词:掌握单词的构成规则很重要

    1. 标识符的识别:字母|下划线+( 字母/ 数字/ 下划线)
    2. 关键字的识别:与标识符相同,最后查表
    3. 常数的识别
    4. 界符和算符的识别
  • 大多数程序设计语言的单词符号都可以用转换图来识别

    如图

    [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Hsvoq2oJ-1636038897507)(C:\Users\Lunatic\Desktop\编译原理实验\实验一\1.png)]

  • 词法分析器输出的单词符号常常表示为二元式

    即单词种别、单词符号的属性值

    1. 单词种别通常用整数编码,如1 代表关键字,2 代表标识符等
    2. 关键字可视其全体为一种,也可以一字一种。采用一字一种得分法实际处理起来较为方便。
    3. 标识符一般统归为一种
    4. 常数按类型(整、实、布尔等)分种
    5. 运算符可采用一符一种的方法。
    6. 界符一般一符一种的分法。

超前搜索法

词法分析时,常常会用到超前搜索方法。

如当前待分析字符串为“a>+” ,当前字符为“>” ,此时,分析器倒底是将其分析为大于关系运算符还是大于等于关系运算符呢?

显然,只有知道下一个字符是什么才能下结论。于是分析器读入下一个字符’+’ ,这时可知应将’>’ 解释为大于运算符。但此时,超前读了一个字符’+’ ,所以要回退一个字符,词法分析器才能正常运行。

预处理

预处理工作包括对空白符、跳格符、回车符和换行符等编辑性字符的处理,及删除注解等。由一个预处理子程序来完成。

四、词法分析器的设计

设计方法:

  1. 写出该语言的词法规则。
  2. 把词法规则转换为相应的状态转换图。
  3. 把各转换图的初态连在一起,构成识别该语言的自动机
  4. 设计扫描器

把扫描器作为语法分析的一个过程,当语法分析需要一个单词时,就调用扫描器。 扫描器从初态出发,当识别一个单词后便进入终态,送出二元式。

五、编码

增加内容总结

  1. 增加了更多关键字,关键字列表如下

    char *key[34] = {"if", "else", "for", "while", "do", "return", "break", "continue", "auto", "double",
                     "union", "const", "float", "short", "unsigned", "void", "default", "sizeof",
                     "main", "static", "switch", "continue","float"
    };
    
  2. 增加对小数的处理

main.cpp ,读取输入内容并逐个字符读取已进行分析

#include "immintrin.h"
#include<cstdio>
#include <cctype>
#include <fstream>
#include "iostream"
#include "for_digit.h"
#include "for_vocabulary.h"
#include "special.h"

int main() {
    fp = fopen(R"(C:\Users\Lunatic\Desktop\test.c)", "r");
    buffer = fgetc(fp);  /*fgetc( )函数:从磁盘文件读取一个字符*/
    while (buffer != EOF) {
        if (buffer == ' ' || buffer == '\n' || buffer == '\t') {
            buffer = fgetc(fp);
        }
        else if (isalpha(buffer)) {
            buffer = vocabularyProcess(buffer);
        }
        else if (isdigit(buffer)) {
            buffer = digitProcess(buffer);
        }
        else  {
            buffer = special_char(buffer);
        }
    }
}

key.h 定义保留字

//
// Created by Lunatic on 2021/5/12.
//

#ifndef T1_KEY_H
#define T1_KEY_H

FILE *fp;
char buffer;
char *key[34] = {"if", "else", "for", "while", "do", "return", "break", "continue", "auto", "double",
                 "int", "struct", "long", "switch", "case", "File", "register", "typedef", "char", "extern",
                 "union", "const", "float", "short", "unsigned", "void", "default", "sizeof",
                 "main", "static", "switch", "continue","float"
};
[[maybe_unused]] int atype, id = 4;

#endif //T1_KEY_H

for_vocabulary.h 处理单词

//
// Created by Lunatic on 2021/5/12.
//

#ifndef T1_FOR_VOCABULARY_H
#define T1_FOR_VOCABULARY_H

#include "check.h"

char vocabularyProcess(char buffer) {
    int position;   /*保留字数组中的位置*/
    int i = -1;
    char words[20];
    while ((isalpha(buffer)) || (isdigit(buffer)) || buffer == '_') {
        /*读一个完整的单词放入alphatp数组中*/
        words[++i] = buffer;
        buffer = fgetc(fp);
    }
    words[i + 1] = '\0';
    position = checkType(words, 1); // check the type of the words
    if (position != 0) {
        printf("%s, (1.Key Words,%d)\n", words, position - 1);
        id = 1;
    } else {
        printf("(%s ,2.Identifier)\n", words);
        id = 2;
    }
    return (buffer);
}

#endif //T1_FOR_VOCABULARY_H

check.h 确定单词是保留字还是标识符

//
// Created by Lunatic on 2021/5/12.
//

#ifndef T1_CHECK_H
#define T1_CHECK_H

/*1:关键字 2:标识符 3:常数 4:运算符 5:界符*/
int checkType(char *text, int type) {/*判断单词是保留字还是标识符*/
    int p;
    if (type == 1) {
        for (int i = 0; i <= 32; ++i) {
            if (strcmp(key[i], text) == 0) {
                p = i + 1;    /*是保留字则p为非0且不重复的整数*/
                break;
            } else {
                p = 0;
            }             /*不是保留字则用于返回的p=0*/
        }
        return (p);
    }
}

#endif //T1_CHECK_H

for_digit.h 读取数字

//
// Created by Lunatic on 2021/5/12.
//

#ifndef T1_FOR_DIGIT_H
#define T1_FOR_DIGIT_H

#include "cstdio"
#include<cstring>
#include <cctype>
#include "key.h"

char digitProcess(char buffer) {
    int i = -1;
    char digittp[20];
    while ((isdigit(buffer)) || buffer == '.') { // int , float
        digittp[++i] = buffer;
        buffer = fgetc(fp);
    }
    digittp[i + 1] = '\0';
    printf("(%s ,3.Number)\n", digittp);
    id = 3;
    return (buffer);
}

#endif //T1_FOR_DIGIT_H

special.h 其他字符处理,包括界符、操作符、小数

//
// Created by Lunatic on 2021/5/16.
//

#ifndef T1_SPECIAL_H
#define T1_SPECIAL_H

#include "special_c/get_next.h"

char special_char(char c) {
    char ch[20];
    ch[0] = c;
    if (ch[0] == ',' || ch[0] == ';' || ch[0] == '{' || ch[0] == '}' || ch[0] == '(' || ch[0] == ')' || ch[0] == '[' ||
        ch[0] == ']') {
        // is broader
        printf("(%c ,5.Boundary operator)\n", ch[0]);
        return get_next_char(fp);
    } else if (ch[0] == '"') {
        // format define
        c = fgetc(fp);
        bool a;
        int b;
        a =b;
        if (c == '%') {
            ch[1] = fgetc(fp);
            ch[0] = '%';
            ch[2] = '\0';
            printf("(%s,4.Operator)\n", ch);
            return get_next_char(fp);
        }
    } else if (ch[0] == '*') {
        // operator /times
        c = fgetc(fp);
        ch[1] = c;
        if (ch[1] == '=') {
            ch[2] = '\0';
            printf("(%s,4.Operators )\n", ch);
            return get_next_char(fp);
        }
    } else if (ch[0] == '/') {
        c = fgetc(fp);
        ch[1] = c;
        if (ch[1] == '=') {
            // operator dividing
            ch[2] = '\0';
            printf("(%s,4.Operators )\n", ch);
            return get_next_char(fp);
        } else if (ch[1] == '*' || ch[1] == '/') {
            // two kinds of exegesis
            int i = 1;
            if (ch[1] == '*') {
                // multi lines exegesis
                while (ch[i] != '/') {
                    c = fgetc(fp);
                    ch[++i] = c;
                }
                ch[i + 1] = '\0';
                printf("(%s ,5.Boundary operator)\n", ch);
                return get_next_char(fp);
            } else {
                // single line exegesis
                while (ch[i] != '\n') {
                    c = fgetc(fp);
                    ch[++i] = c;
                }
                ch[i] = '\0';
                printf("(%s ,5.Boundary operator)\n", ch);
                return get_next_char(fp);
            }
        }
    } else if (ch[0] == '=' || ch[0] == '!' || ch[0] == '<' || ch[0] == '>') {
        c = fgetc(fp);
        if (c == '=') {
            // equal to
            ch[1] = c;
            ch[2] = '\0';
            printf("(%s ,4.Operators )\n", ch);
        } else {
            printf("(%s ,4.Operators )\n", ch);
            id = 4;
            return (c);
        }
        return get_next_char(fp);
    } else if (ch[0] == '+' || ch[0] == '-') {
        if (id == 4) {
            // plus or minus
            c = fgetc(fp);
            int i = 1;
            ch[i] = c;
            if (isdigit(ch[i])) {
                // get the total integer
                while (isdigit(ch[i]) || ch[i] == '.') {
                    c = fgetc(fp);
                    ch[++i] = c;
                }
                ch[i] = '\0';
                printf("(%s ,3.Number)\n", ch);
                id = 3;
                //c=fgetc(fp);
                id = 4;
                return (c);
            }
            return get_next_char(fp);
        } else if (ch[0] == '+') { // ++,--
            c = fgetc(fp);
            ch[1] = c;
            if (ch[1] == '=') {
                ch[2] = '\0';
                printf("(%s,4.Operators )\n", ch);
                return get_next_char(fp);
            }
            ch[1] = '\0';
            printf("(%s ,4.Operators )\n", ch);
            return get_next_char(fp);
        }
        if (ch[0] == '-') {
            c = fgetc(fp);
            ch[1] = c;
            if (ch[1] == '=') {
                ch[2] = '\0';
                printf("(%s,4. Operators )\n", ch);
                return get_next_char(fp);
            }
            ch[1] = '\0';
            printf("(%s ,4.Operators )\n", ch);
            id = 4;
            return (c);
        }
    } else if (ch[0] == '&' || ch[0] == '^' || ch[0] == '|') {
        // characters for logic computing
        c = fgetc(fp);
        if (c == '&') {
            ch[1] = c;
            ch[2] = '\0';
            printf("(%s ,4.Operators )\n", ch);
            return get_next_char(fp);
        } else if (c == '|') {
            ch[1] = c;
            ch[2] = '\0';
            printf("(%s ,4.Operators )\n", ch);
            return get_next_char(fp);
        } else {
            printf("(%s ,4.Operators )\n", ch);
            id = 4;
            return (c);
        }
    }
    // #ifndef,
}

#endif //T1_SPECIAL_H

增加内容总结

  1. 全部的保留字、运算符、分隔符
  2. 头文件语句中单词的分析
  3. scanfprintf 语句中各类单词的分析
  4. 小数,正负多位数
  5. 结构体

四、测试与运行

测试程序 test.c

int t;
int main() {
    // keywords
    a+-
    File *fp;
    register int  a=-5.455.5 =4,j;
    char b;
    if(a!=b)
        j=a-b;
    else  j=b-a;
    for (int i = 0; i < MAX_SIZE; i++) {
        continue;
        break;
    }
    /*
     * numbers 
     */
    int  c = 1.1; 
    // operators
    d = b && c || f;
    // test

    if (a > b) {
        do {
            b = b+1;
        } while (a = b);
        float e = b;
        printf("%f",e);
    }
    return 0;
}

void test(ch) {
    break;
}

typedef struct node{
    static int data[20];
    const short top;
} SqStack, node;

识别结果过长,结果仅粘贴文本内容及部分截图

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-2lsJ2cdR-1636038897515)(C:\Users\Lunatic\Desktop\编译原理实验\实验一\1.jpg)]

int, (1.Key Words,10)
(t ,2.Identifier)
(; ,5.Boundary operator)
int, (1.Key Words,10)
main, (1.Key Words,28)
(( ,5.Boundary operator)
() ,5.Boundary operator)
({ ,5.Boundary operator)
(// keywords ,5.Boundary operator)
(a ,2.Identifier)
(+ ,4.Operators )
File, (1.Key Words,15)
(fp ,2.Identifier)
(; ,5.Boundary operator)
register, (1.Key Words,16)
int, (1.Key Words,10)
(a ,2.Identifier)
(= ,4.Operators )
(-5.455.5 ,3.Number)
(, ,5.Boundary operator)
(=5.455.5 ,4.Operators )
(4 ,3.Number)
(, ,5.Boundary operator)
(j ,2.Identifier)
(; ,5.Boundary operator)
char, (1.Key Words,18)
(b ,2.Identifier)
(; ,5.Boundary operator)
if, (1.Key Words,0)
(( ,5.Boundary operator)
(a ,2.Identifier)
(!= ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
(j ,2.Identifier)
(= ,4.Operators )
(a ,2.Identifier)
(- ,4.Operators )
(b ,2.Identifier)
(; ,5.Boundary operator)
else, (1.Key Words,1)
(j ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(- ,4.Operators )
(a ,2.Identifier)
(; ,5.Boundary operator)
for, (1.Key Words,2)
(( ,5.Boundary operator)
int, (1.Key Words,10)
(i ,2.Identifier)
(= ,4.Operators )
(0 ,3.Number)
(; ,5.Boundary operator)
(i ,2.Identifier)
(< ,4.Operators )
(MAX_SIZE ,2.Identifier)
(; ,5.Boundary operator)
(i ,2.Identifier)
(+ ,4.Operators )
() ,5.Boundary operator)
({ ,5.Boundary operator)
continue, (1.Key Words,7)
(; ,5.Boundary operator)
break, (1.Key Words,6)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
(/*
     * numbers
     */ ,5.Boundary operator)
int, (1.Key Words,10)
(c ,2.Identifier)
(= ,4.Operators )
(1.1 ,3.Number)
(; ,5.Boundary operator)
(// operators ,5.Boundary operator)
(d ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(&& ,4.Operators )
(c ,2.Identifier)
(|| ,4.Operators )
(f ,2.Identifier)
(; ,5.Boundary operator)
(// test ,5.Boundary operator)
if, (1.Key Words,0)
(( ,5.Boundary operator)
(a ,2.Identifier)
(> ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
({ ,5.Boundary operator)
do, (1.Key Words,4)
({ ,5.Boundary operator)
(b ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(+ ,4.Operators )
(; ,5.Boundary operator)
(} ,5.Boundary operator)
while, (1.Key Words,3)
(( ,5.Boundary operator)
(a ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
() ,5.Boundary operator)
(; ,5.Boundary operator)
float, (1.Key Words,22)
(e ,2.Identifier)
(= ,4.Operators )
(b ,2.Identifier)
(; ,5.Boundary operator)
(printf ,2.Identifier)
(( ,5.Boundary operator)
(%f,4.Operator)
(, ,5.Boundary operator)
(e ,2.Identifier)
() ,5.Boundary operator)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
return, (1.Key Words,5)
(0 ,3.Number)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
void, (1.Key Words,25)
(test ,2.Identifier)
(( ,5.Boundary operator)
(ch ,2.Identifier)
() ,5.Boundary operator)
({ ,5.Boundary operator)
break, (1.Key Words,6)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
typedef, (1.Key Words,17)
struct, (1.Key Words,11)
(node ,2.Identifier)
({ ,5.Boundary operator)
static, (1.Key Words,29)
int, (1.Key Words,10)
(data ,2.Identifier)
([ ,5.Boundary operator)
(20 ,3.Number)
(] ,5.Boundary operator)
(; ,5.Boundary operator)
const, (1.Key Words,21)
short, (1.Key Words,23)
(top ,2.Identifier)
(; ,5.Boundary operator)
(} ,5.Boundary operator)
(SqStack ,2.Identifier)
(, ,5.Boundary operator)
(node ,2.Identifier)
(; ,5.Boundary operator)
;