我最近正在学编译原理,我有c语言实现的词法分析程序,不知可不可以,识别的是TEST语言的单词。
十余年品牌的成都网站建设公司,1000多家企业网站设计经验.价格合理,可准确把握网页设计诉求.提供定制网站建设、商城网站开发、成都微信小程序、响应式网站开发等服务,我们设计的作品屡获殊荣,是您值得信赖的专业网络公司。
#includestdio.h
#includectype.h
#includestring.h
#define keywordSum 8
char * keyword[keywordSum] = {"do", "else", "for", "if", "int", "read", "while", "write"};
char singleword[50] = "+-*(){};,:";
char doubleword[10] = "=!|";
char Scanin[300], Scanout[300];
FILE * fin, * fout;
int binaryFind(int low, int high, char * c1, char ** c2) {
int mid;
if(low high) return -1;
mid = (low+high)/2;
if(strcmp(c1, c2[mid]) == 0) return mid;
else if(strcmp(c1, c2[mid]) 0) return binaryFind(mid+1, high, c1, c2);
else return binaryFind(low, mid-1, c1, c2);
}
int TESTscan() {
char ch, token[40];
int es = 0, j, n;
printf("请输入源文件名(包括路径):");
scanf("%s", Scanin);
printf("请输入词法分析输出文件名(包括路径):");
scanf("%s", Scanout);
if((fin=fopen(Scanin, "r")) == NULL) {
printf("\n打开词法分析输入文件出错!\n");
return 1;
}
if((fout=fopen(Scanout, "w")) == NULL) {
printf("\n创建词法分析输出文件出错!\n");
return 2;
}
// printf("%c", getc(fin));
ch = getc(fin);
while(ch != EOF) {
while(ch==' ' || ch=='\n' || ch=='\t') {
ch = getc(fin);
}
if(isalpha(ch)) { //标识符
token[0] = ch;
j = 1;
ch = getc(fin);
while(isalnum(ch)) { //判断当前字符是否是字母或数字
token[j++] = ch;
ch = getc(fin);
}
token[j] = '\0';
// printf("%s", token);
n = binaryFind(0, keywordSum-1, token, keyword);
if(n 0 ) {
fprintf(fout, "%s\t%s\n", "ID", token);
} else {
fprintf(fout, "%s\t%s\n", token, token);
}
} else if(isdigit(ch)) { //数字
token[0] = ch;
j = 1;
ch = getc(fin);
while(isdigit(ch)) {
token[j++] = ch;
ch = getc(fin);
}
token[j] = '\0';
fprintf(fout, "%s\t%s\n", "NUM", token);
} else if(strchr(singleword, ch) 0) { //singleword
token[0] = ch;
token[1] = '\0';
ch = getc(fin);
fprintf(fout, "%s\t%s\n", token, token);
} else if(strchr(doubleword, ch) 0) { //doubleword
token[0] = ch;
ch = getc(fin);
if(ch=='=' (token[0]==''||token[0]=='' || token[0] == '!')) {
token[1] = ch;
token[2] = '\0';
ch = getc(fin);
} else if((ch=='')||(ch=='|')||(ch=='=') ch==token[0]) {
token[1] = ch;
token[2] = '\0';
ch = getc(fin);
} else {
token[1] = '\0';
}
fprintf(fout, "%s\t%s\n", token, token);
} else if(ch == '/') { //注释
ch = getc(fin);
if(ch == '*') {
char ch1;
ch1 = getc(fin);
do {
ch = ch1;
ch1 = getc(fin);
} while((ch!='*'||ch1!='/') ch1!=EOF);
ch = getc(fin);
} else {
token[0] = '/';
token[1] = '\0';
fprintf(fout, "%s\t%s\n", token, token);
}
} else {
token[0] = ch;
token[1] = '\0';
ch = getc(fin);
es = 3;
fprintf(fout, "%s\t%s\n", "ERROR", token);
}
}
fclose(fin);
fclose(fout);
return es;
}
void main() {
int es = 0;
es = TESTscan();
if(es 0) {
printf("词法分析有错, 编译停止!\n");
} else {
printf("词法分析成功!\n");
}
}
这个我做了一个类似的,识别字符串的,发给你,你在这个基础上改一下吧,哥最近在忙软考不然帮你做做。
程序识别的字符串输入的格式:
1、输入如下正确的常量说明串:
const count=10,sum=81.5,char1=‘f’,max=169,str1=“h*54 2..4S!AAsj”, char2=‘@’,str2=“aa!+h”;
输出:
count(integer,10)
sum(float,81.5)
char1(char, ‘f’)
max(integer,169)
str1(string,“h*54 2..4S!AAsj”)
char2(char, ‘@’)
str2(string,“aa!+h”)
int_num=2; char_num=2; string_num=2; float_num=1.
程序如下:
#includeiostream
using namespace std;
int char_num=0,string_num=0,int_num=0,float_num=0; // 字符值类型,0=字符,1=字符串,2=整数,3=浮点数
class zifu //建立字符类,存储一个字符串的信息
{
public:
int i;
char name[20];
char data[20];
int dataname;
void display(){
i=0;
while(name[i]!='\0')
{
coutname[i];
i++;
}
cout"=";
i=0;
while(data[i]!='\0'){
coutdata[i];
}
coutendl;
coutdatanameendl;
}
};
void input(char a[]) //输入函数
{
char stand[6]="const";
int i=0,judge=0;
gets(a);
for(i=0;i5;i++)
{
if(a[i]==stand[i])
judge++;
}
if(judge!=5 || a[5]!=' ')
{
cout"It is not a constant declaration statement! ";
cout"Please input a string again!"endl;
input(a);
}
}
void sort(zifu aclass[],char b[],int integer,int tag) //识别函数,tag 类标记 ,integer 当前查找的位置
{
int i=0;
while(b[++integer]!='=')
{
if(b[integer]!=' ')
aclass[tag].name[i]=b[integer];
i++; //获取字符串名
}
aclass[tag].name[i]='\0';
i=0;
++integer;
while(b[integer]!=',' b[integer]!=';') //获取字符串值、值类型
{
if(b[integer]='0' b[integer]='9')// 字符值类型,0=字符,1=字符串,2=整数,3=浮点数
aclass[tag].dataname=2;
else if(b[integer]=='.') //浮点数判断。。。真麻烦
{ aclass[tag].dataname=3;
while(b[integer]!=',' b[integer]!=';'){
aclass[tag].data[i]=b[integer];
integer++;i++;}}
else if(b[integer]='a' b[integer]='z')
aclass[tag].dataname=0;
else if(b[integer]='A' b[integer]='Z')
aclass[tag].dataname=1;
else if(b[integer]=='\'' || b[integer]=='\'')
aclass[tag].dataname=0;
else if(b[integer]=='"' || b[integer]=='"')
aclass[tag].dataname=1;
if(b[integer]!=',' b[integer]!=';')
{
aclass[tag].data[i]=b[integer];
i++;
++integer;
}
}
aclass[tag].data[i]='\0';
++tag;
if(b[integer]!=';')
sort(aclass,b,integer,tag);
}
void display(zifu a) //输出函数
{
int n=0;
int q=0;
while (a.name[n]!='\0')
{couta.name[n];
n++;}
cout"(";
switch (a.dataname)
{case 0:
cout"char";
char_num++;
break;
case 1:
cout"string";
string_num++;
break;
case 2:
cout"integer";
int_num++;
break;
case 3:
cout"float";
float_num++;
break;
default:
cout"type error";}
cout',';
while (a.data[q]!='\0')
{couta.data[q];
q++;}
cout")"endl;
}
void main()
{
zifu teger[10];
char c[100];
int tag0=0,integer0=5;
int m=0;
while(1){
cout"please input:"endl;
input(c);
// cout"0=字符,1=字符串,2=整数,3=浮点数"endl;
sort(teger,c,integer0,tag0);
// cout"字符串数量="tag0endl;
for(m=0;mtag0;m++)
{display(teger[m]);}
coutendl"int_num="int_num"; ""char_num="char_num"; ";
cout"string_num="string_num"; ""float_num="float_numendlendl;
}
}
首先看下我们要分析的代码段如下:
输出结果如下:
输出结果(a).PNG
输出结果(b).PNG
输出结果(c).PNG
括号里是一个二元式:(单词类别编码,单词位置编号)
代码如下:
?
1234567891011121314
package Yue.LexicalAnalyzer; import java.io.*; /* * 主程序 */public class Main { public static void main(String[] args) throws IOException { Lexer lexer = new Lexer(); lexer.printToken(); lexer.printSymbolsTable(); }}
?
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
package Yue.LexicalAnalyzer; import java.io.*;import java.util.*; /* * 词法分析并输出 */public class Lexer { /*记录行号*/ public static int line = 1; /*存放最新读入的字符*/ char character = ' '; /*保留字*/ HashtableString, KeyWord keywords = new HashtableString, KeyWord(); /*token序列*/ private ArrayListToken tokens = new ArrayListToken(); /*符号表*/ private ArrayListSymbol symtable = new ArrayListSymbol(); /*读取文件变量*/ BufferedReader reader = null; /*保存当前是否读取到了文件的结尾*/ private Boolean isEnd = false; /* 是否读取到文件的结尾 */ public Boolean getReaderState() { return this.isEnd; } /*打印tokens序列*/ public void printToken() throws IOException { FileWriter writer = new FileWriter("E:\\lex.txt"); System.out.println("词法分析结果如下:"); System.out.print("杜悦-2015220201031\r\n\n"); writer.write("杜悦-2015220201031\r\n\r\n"); while (getReaderState() == false) { Token tok = scan(); String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.toString() + "\r\n"; writer.write(str); System.out.print(str); } writer.flush(); } /*打印符号表*/ public void printSymbolsTable() throws IOException { FileWriter writer = new FileWriter("E:\\symtab1.txt"); System.out.print("\r\n\r\n符号表\r\n"); System.out.print("编号\t行号\t名称\r\n"); writer.write("符号表\r\n"); writer.write("编号 " + "\t行号 " + "\t名称 \r\n"); IteratorSymbol e = symtable.iterator(); while (e.hasNext()) { Symbol symbol = e.next(); String desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.toString(); System.out.print(desc + "\r\n"); writer.write(desc + "\r\n"); } writer.flush(); } /*打印错误*/ public void printError(Token tok) throws IOException{ FileWriter writer = new FileWriter("E:\\error.txt"); System.out.print("\r\n\r\n错误词法如下:\r\n"); writer.write("错误词法如下:\r\n"); String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t" + tok.name + ": " + tok.toString() + "\r\n"; writer.write(str); } /*添加保留字*/ void reserve(KeyWord w) { keywords.put(w.lexme, w); } public Lexer() { /*初始化读取文件变量*/ try { reader = new BufferedReader(new FileReader("E:\\输入.txt")); } catch (IOException e) { System.out.print(e); } /*添加保留字*/ this.reserve(KeyWord.begin); this.reserve(KeyWord.end); this.reserve(KeyWord.integer); this.reserve(KeyWord.function); this.reserve(KeyWord.read); this.reserve(KeyWord.write); this.reserve(KeyWord.aIf); this.reserve(KeyWord.aThen); this.reserve(KeyWord.aElse); } /*按字符读*/ public void readch() throws IOException { character = (char) reader.read(); if ((int) character == 0xffff) { this.isEnd = true; } } /*判断是否匹配*/ public Boolean readch(char ch) throws IOException { readch(); if (this.character != ch) { return false; } this.character = ' '; return true; } /*数字的识别*/ public Boolean isDigit() throws IOException { if (Character.isDigit(character)) { int value = 0; while (Character.isDigit(character)) { value = 10 * value + Character.digit(character, 10); readch(); } Num n = new Num(value); n.line = line; tokens.add(n); return true; } else return false; } /*保留字、标识符的识别*/ public Boolean isLetter() throws IOException { if (Character.isLetter(character)) { StringBuffer sb = new StringBuffer(); /*首先得到整个的一个分割*/ while (Character.isLetterOrDigit(character)) { sb.append(character); readch(); } /*判断是保留字还是标识符*/ String s = sb.toString(); KeyWord w = keywords.get(s); /*如果是保留字的话,w不应该是空的*/ if (w != null) { w.line = line; tokens.add(w); } else { /*否则就是标识符,此处多出记录标识符编号的语句*/ Symbol sy = new Symbol(s); Symbol mark = sy; //用于标记已存在标识符 Boolean isRepeat = false; sy.line = line; for (Symbol i : symtable) { if (sy.toString().equals(i.toString())) { mark = i; isRepeat = true; } } if (!isRepeat) { sy.pos = symtable.size() + 1; symtable.add(sy); } else if (isRepeat) { sy.pos = mark.pos; } tokens.add(sy); } return true; } else return false; } /*符号的识别*/ public Boolean isSign() throws IOException { switch (character) { case '#': readch(); AllEnd.allEnd.line = line; tokens.add(AllEnd.allEnd); return true; case '\r': if (readch('\n')) { readch(); LineEnd.lineEnd.line = line; tokens.add(LineEnd.lineEnd); line++; return true; } case '(': readch(); Delimiter.lpar.line = line; tokens.add(Delimiter.lpar); return true; case ')': readch(); Delimiter.rpar.line = line; tokens.add(Delimiter.rpar); return true; case ';': readch(); Delimiter.sem.line = line; tokens.add(Delimiter.sem); return true; case '+': readch(); CalcWord.add.line = line; tokens.add(CalcWord.add); return true; case '-': readch(); CalcWord.sub.line = line; tokens.add(CalcWord.sub); return true; case '*': readch(); CalcWord.mul.line = line; tokens.add(CalcWord.mul); return true; case '/': readch(); CalcWord.div.line = line; tokens.add(CalcWord.div); return true; case ':': if (readch('=')) { readch(); CalcWord.assign.line = line; tokens.add(CalcWord.assign); return true; } break; case '': if (readch('=')) { readch(); CalcWord.ge.line = line; tokens.add(CalcWord.ge); return true; } break; case '': if (readch('=')) { readch(); CalcWord.le.line = line; tokens.add(CalcWord.le); return true; } break; case '!': if (readch('=')) { readch(); CalcWord.ne.line = line; tokens.add(CalcWord.ne); return true; } break; } return false; } /*下面开始分割关键字,标识符等信息*/ public Token scan() throws IOException { Token tok; while (character == ' ') readch(); if (isDigit() || isSign() || isLetter()) { tok = tokens.get(tokens.size() - 1); } else { tok = new Token(character); printError(tok); } return tok; }}