——实验题目
班级:计算机1507班 姓名:罗艺博
一、实验目标:
词法分析扫描器的设计实现。在程序源文件中输入类C语言程序源文件,设计词法分析扫描器,并以TOKEN类别码序列的形式输出扫描结果。
二、实验内容:
1.概要设计:
将程序大致分为:主函数、识别器(有限自动机state_change)、词法分析器(state_to_code)三大部分。
其中,主函数所完成的功能为:打开、读取、关闭文件(即C语言源程序),重置token串等。
识别器的功能为:识别字符,完成token串的生成,判断词法错误等。
词法分析器的功能为:生成token类别码,判断token是否出错等。
学号:201377
主函数、识别器、词法分析器关系图
2.流程图
词法分析扫描器流程图
3.关键函数
a. 识别器(有限自动机state_change)
int state_change(state,ch) //识别器,有限自动机 { if((ch == ' ') || (ch == '\') || (ch == '\\n')) //略去空格、TAB、换行 return 0;
else if((IsAlpha(ch) == 1) || (ch == '_')) //判断是否为字母或'_',从而判断是关键字、
标识符、CT字符还是ST字符串 {
if(state == 1) return 2; else if(state == 2) return 2; else if(state == 7) return 8; else if(state == 8) return 8;
else if(state == 10) return 11; else if(state == 11) return 11; else if(state == 6) //考虑与PT相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 2; } else
return -1; }
else if(IsNum(ch) == 1) //判断是否为数字,从而进一步判断是为IT,CT,KT {
if(state == 1) return 3; else if(state == 2) return 2; else if(state == 3) return 3; else if(state == 4) return 4; else if(state == 5) return 5; else if(state == 6) //考虑与PT相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 3; }
else
return -1; }
else if(ch == '.') //判断是否为CT中的小数点
{
if(state == 3) return 4; else
return -1; }
else if(ch == '\\'') {
if(state == 1) return 7;
else if(state == 6) {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 7; }
else if(state == 8) return 9; else
return -1; }
else if(ch == '\\\"') {
if(state == 1) return 10; else if(state == 6) {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 10; }
else if(state == 11) return 12; else
return -1; } else {
//判断是否为CT字符
//考虑与PT相连的情况
//判断是否为ST
//考虑与PT相连的情况
//判断是否为PT
if(state == 1) return 6; else if(state == 2) //考虑与IT,KT相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 6; }
else if(state == 3) //考虑与CT相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 6; }
else if(state == 9) //考虑与CT字符相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 6; }
else if(state == 12) //考虑与ST相连的情况 {
state_to_code(state_before,token); i = 0;
memset(token,0,10); return 6; }
else if(state == 6) {
if(((i == 1)&&(ch == '='))&&((token[0] == '>')||(token[0] == '<')||(token[0] == '=')))
{
return 6; } else
return 0; } else
return -1; } }
b. 词法分析器(state_to_code)
int state_to_code(state_before,token) {
int n; //循环 code1 = 0; code2 = 0; switch(state_before) { case {
if(strcmp(token, KT[n]) == 0) {
code1 = 1; code2 = n;
printf(\"<%d %d>\\n\code1,code2
break; } }
if(code1 == 0) {
printf(\"<2 0>\\n\"); }
break; case break; case 5: printf(\"<3 0>\\n\"); break; case 6: for(n=0; n<18; n++) {
if(strcmp(token, PT[n]) == 0) {
code1 = 6; code2 = n;
printf(\"<%d %d>\\n\//词法分析器 2://KT <1 X> for(n=0; n<6; n++) //why改变不了全局的//IT <2 0> 3://CT <3 0> printf(\"<3 0>\\n\"); //CT(小数) <3 0> //PT <6 X> //why改变不了全局的
code1,code2
break; } }
break; case 9: //CT字符 <4 0> printf(\"<4 0>\\n\"); break; case 12: printf(\"<5 0>\\n\"); break; default : printf(\"Sorry it's going wrong!\"); } return 0; }
c.判断函数
int IsAlpha(char c) {
if(((c>='a')&&(c<='z')) || ((c>='A')&&(c<='Z'))) return 1; else
return 0; }
int IsNum(char c) {
if(c>='0'&&c<='9') return 1; else
return 0; }
int IsKey(char *word) {
int m,n;
for(n=0; n<6; n++) {
if((m = strcmp(word, KT[n])) == 0) {
//ST <5 0>
//不被任何词识别
//判断是否为字母
//判断是否为数字
//判断是否为关键字
if(n == 0) return 2; return 1; } }
return 0; }
int IsDelimiter(char *token) {
int m,n;
for(n=0; n<18; n++) {
if((m = strcmp(token, PT[n])) == 0) {
if(n == 0) return 2; return 1; } }
return 0; }
//判断是否为界符
源程序代码:(加入注释)
#include char word[10]; //存放识别出的单词流 char ch; int p; int i=0; //token下标 int state; int code2; int state_before; //存放之前状态 //存放状态标记 //存放当前读入字符 //input下标 //存放输入字符串 //存放构成单词符号的符号串 int code1; //token类别码,1->KT,2->IT,3->CT,4->CTT,5->ST,6->PT int num; //存放整形值 char *KT[6] = {\"int\ //1 char *PT[18] = {\">=\ //18 //char *IT[20] = {NULL}; //2 //char *CT[20] = {NULL}; //3 //char *CTT[20] = {\"\\0\ //4 //char *ST[20] = {NULL}; //5 int state_change(state, ch); int state_to_code(state_before, token); int IsAlpha(char c); int IsNum(char c); int IsKey(char *word); int IsDelimiter(char *token); int main() { state = 1; //state=1设为初始态,0设为结束态 if((fp = fopen(\"E:\\\\Cwork\\\\Analysis\\\est.txt\ { printf(\"Cannot open file.\\n\"); exit(1); } while((ch = fgetc(fp)) != '#') { { printf(\" Err! Stupid man!\\n\"); } token[i++] = ch; { state_to_code(state_before,token); state_before = state; state = state_change(state,ch); if(state == -1) else if (state != 0) { } else { if(state_before != 1) } } int state_change(state,ch) //识别器,有限自动机 { return 0; else if((IsAlpha(ch) == 1) || (ch == '_')) CT字符还是ST字符串 { if(state == 1) return 2; else if(state == 2) return 2; else if(state == 7) return 8; else if(state == 8) return 8; else if(state == 10) return 11; else if(state == 11) return 11; else if(state == 6) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 2; } else return -1; } else if(IsNum(ch) == 1) { //判断是否为数字,从而进一步判断是为IT,CT,KT //考虑与PT相连的情况 //判断是否为字母或'_',从而判断是关键字、标识符、 if((ch == ' ') || (ch == '\') || (ch == '\\n')) //略去空格、TAB、换行 fclose(fp); return 0; } memset(token,0,10); i = 0; state = 1; } if(state == 1) return 3; else if(state == 2) return 2; else if(state == 3) return 3; else if(state == 4) return 4; else if(state == 5) return 5; else if(state == 6) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 3; } else return -1; } else if(ch == '.') { if(state == 3) return 4; else return -1; } else if(ch == '\\'') { if(state == 1) return 7; else if(state == 6) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 7; } else if(state == 8) return 9; else return -1; } else if(ch == '\\\"') //考虑与PT相连的情况 //判断是否为CT中的小数点 //判断是否为CT字符 //考虑与PT相连的情况 //判断是否为ST { if(state == 1) return 10; else if(state == 6) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 10; } else if(state == 11) return 12; else return -1; } else { if(state == 1) return 6; else if(state == 2) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 6; } else if(state == 3) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 6; } else if(state == 9) { state_to_code(state_before,token); i = 0; memset(token,0,10); return 6; } else if(state == 12) { state_to_code(state_before,token); i = 0; //考虑与PT相连的情况 //判断是否为PT //考虑与IT,KT相连的情况 //考虑与CT相连的情况 //考虑与CT字符相连的情况 //考虑与ST相连的情况 memset(token,0,10); return 6; } else if(state == 6) { if(((i == 1)&&(ch == '='))&&((token[0] == '>')||(token[0] == '<')||(token[0] == '='))) { return 6; } else return 0; } else return -1; } } int state_to_code(state_before,token) { int n; //循环 code1 = 0; code2 = 0; { if(strcmp(token, KT[n]) == 0) { code1 = 1; code2 = n; printf(\"<%d %d>\\n\ //why改变不了全局的code1,code2 break; } } if(code1 == 0) { printf(\"<2 0>\\n\"); } break; case 3: //CT <3 0> printf(\"<3 0>\\n\"); //IT <2 0> switch(state_before) { case 2: //KT <1 X> for(n=0; n<6; n++) //词法分析器 break; case 5: printf(\"<3 0>\\n\"); break; case 6: for(n=0; n<18; n++) { if(strcmp(token, PT[n]) == 0) { code1 = 6; code2 = n; printf(\"<%d %d>\\n\ //why改变不了全局的code1,code2 break; } } break; case 9: printf(\"<4 0>\\n\"); break; case 12: printf(\"<5 0>\\n\"); break; default : printf(\"Sorry it's going wrong!\"); } int IsAlpha(char c) { if(((c>='a')&&(c<='z')) || ((c>='A')&&(c<='Z'))) return 1; else return 0; } int IsNum(char c) { if(c>='0'&&c<='9') return 1; else return 0; } //判断是否为数字 //判断是否为字母 } return 0; //不被任何词识别 //ST <5 0> //CT字符 <4 0> //PT <6 X> //CT(小数) <3 0> int IsKey(char *word) { int m,n; for(n=0; n<6; n++) { if((m = strcmp(word, KT[n])) == 0) { if(n == 0) return 2; return 1; } } return 0; } int IsDelimiter(char *token) { int m,n; for(n=0; n<18; n++) { if((m = strcmp(token, PT[n])) == 0) { if(n == 0) return 2; return 1; } } return 0; } 程序运行结果:(截屏) 输入: //判断是否为关键字 //判断是否为界符 输出: 因篇幅问题不能全部显示,请点此查看更多更全内容
Copyright © 2019- azee.cn 版权所有 赣ICP备2024042794号-5
违法及侵权请联系:TEL:199 1889 7713 E-MAIL:2724546146@qq.com
本站由北京市万商天勤律师事务所王兴未律师提供法律服务