词法分析器实现[c++]

    xiaoxiao2024-12-19  15

    #include <stdio.h> #include <stdlib.h> #include <fstream> #include <vector> #include <map> #include <string.h> using namespace std; //约定:每一行程序以 \n结尾 //约定:标识符的最大长度为64 //关于注释 暂时仅支持"//"格式 //保留字, const int KEYS_COUNT = 32; static char static_key_words[32][20] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while" }; //常数规约,常数出现再赋值语句的右边 //运算符最多由2个字符组成 //多字符运算符开头 static char both_operator_com[10][2] = { ">", "<", "=", "-", "+", "!", "&", "|", "/", "*" }; //注释判断逻辑 static char annotation_char[2] = "\\"; //常量关于字符串和字符 const int CONST_CHARS_COUNT = 2; static char const_chars[2][2] = { "\"", "'" }; //返回类型定义 const int SUCC = 0; const int FAIL = 1; const int ERROR = -1; const int FILE_NOT_EXIT = 10; //分割符, 不将'/'加入的原因是对于'/'和注释部分分开处理 const int DIVISION_CHARS_COUNT = 21; static char division_chars[21][2] = { " ", ">", "<", "=", "-", "+", "!", "&", "|", "%", "*", ";", "(", ",", "/", ")", "{", "}", "[", "]", "." }; const int DIVISION_MULTI_CHARS_COUNT = 13; static char division_multi_chars[13][3] = { ">>", "<<", "<=", ">=", "+=", "-=", "*=", "/=", "!=", "&&", "*=", "||", "==" }; //双字符校验 const int DIVISION_MULTI_CHAR_COUNT = 5; static char division_multi_char[5][2] = { ">", "<", "=", "&", "|" }; //单个运算符 const int SINGLE_OP_COUNT = 14; static char single_operator[14][2] = { "+", "-", "*", "/", "<", ">", "=", "^", ",", "&", "|", "%", "~", "!" }; //双字符运算符 const int MULTI_OP_COUNT = 13; static char multi_operator[13][3] = { ">>", "<<", "<=", ">=", "+=", "-=", "*=", "/=", "!=", "&&", "==", "*=", "||" }; //边界字符 const int LIMIT_COUNT = 8; static char limit_[8][2] = { "(", ")", "{", "}", ".", "[", "]", ";" }; //当前扫描状态,对于赋值语句而言有效,定义 int status = 0; static char TAG[4] = "tag"; //标识符 static char KEY[4] = "key"; //保留字 static char CONST[6] = "const"; //常量 static char OP[3] = "op"; //运算符 static char LIMIT[6] = "limit"; //边界 static char ERR[6] = "error"; //错误 vector< pair<char*, char*> > tokens; //tokens vector<char*> anno; //注释 //内存初始化 void memset_(char* chrs, char c, int length) { for(int i = 0; i < length; i++) { chrs[i] = c; } } //不是以单字符进行分隔符 int is_not_division_char(char c) { for(int k = 0; k < DIVISION_CHARS_COUNT; k++) { if(c == division_chars[k][0]) { return FAIL; } } return SUCC; } //不是以可能的双字符进行分割 int is_not_multi_division_char(char c) { for(int k = 0; k < DIVISION_MULTI_CHAR_COUNT; k++) { if(c == division_multi_char[k][0]) { return FAIL; } } return SUCC; } //是否为双字符分割符 int is_multi_division_chars(char* chrs) { int len = strlen(chrs); if(len != 2) { return FAIL; } for(int i = 0; i < DIVISION_MULTI_CHARS_COUNT; i++) { if(!strcmp(chrs, division_multi_chars[i])) { return SUCC; } } return FAIL; } //是否为数字字符 int is_dig(char c) { if(c > '9' || c < '0') { return FAIL; } return SUCC; } //是否为常量字符(字符串/字符),对开头字符进行检查 int is_const_char(char c) { for(int k = 0; k < CONST_CHARS_COUNT; k++) { if(c == const_chars[k][0]) { return SUCC; } } return FAIL; } /** 将句子分割成单词 * @param line 带分割字符串 * @param words 引用的结果 * @param line_ 行号 */ void division_str(char* line, vector<char*> &words, int line_) { int len = strlen(line); if(len < 1) { return; } char word[1025]; for(int i = 0; i < len; i++) { memset_(word, '\0', 1025); int j = 0; int div_ = 0; for(; j < 1025 && i < len; j++, i++) { //printf("char:%c\n", line[i]); if(is_not_division_char(line[i]) == SUCC) { //一般分割符 if(line[i] == '\t' || line[i] == '\r') { //printf("blank:%c\n", line[i]); j--; continue; } word[j] = line[i]; } else { if(line[i] != ' ' && line[i] != '/') { //单字符分割符和双字符分隔符 if(is_not_multi_division_char(line[i+1]) == FAIL) { //双字符分割符 if(j != 0) { char * word_ = new char[j+1]; strncpy(word_, word, j); word_[j] = '\0'; words.push_back(word_); } char* div_multi_ = new char[3]; div_multi_[0] = line[i]; div_multi_[1] = line[i+1]; if(is_multi_division_chars(div_multi_) == SUCC) { i++; words.push_back(div_multi_); } //printf("==multi %s\n", div_multi_); div_ = 1; } else { if(j != 0) { char * word_ = new char[j+1]; strncpy(word_, word, j); word_[j] = '\0'; words.push_back(word_); } //单子符分割符 char* div_single_ = new char[2]; div_single_[0] = line[i]; div_single_[1] = '\0'; words.push_back(div_single_); div_ = 1; //printf("==single %s\n", div_single_); } } else if(line[i] == '/') { //争对注释"//"的处理 if(line[i+1] == '/') { i++; for(; i < len; i++, j++) { word[j] = line[i]; } char * word_ = new char[j+1]; strncpy(word_, word, j); word_[j] = '\0'; //printf("(%s)\n", word_); anno.push_back(word_); div_ = 1; } else { if(j != 0) { char * word_ = new char[j+1]; strncpy(word_, word, j); word_[j] = '\0'; words.push_back(word_); } //对除号的处理 char* div_single_ = new char[2]; div_single_[0] = line[i]; div_single_[1] = '\0'; words.push_back(div_single_); div_ = 1; } } //printf("[%c]\n", line[i]); break; } } word[j] = '\0'; //printf("== %s\n", word); //存在标识符长度超过64 if(j == 1025 && is_not_division_char(line[i+1])) { i++; //继续遍历字符串直到遇到分割符,如果没有遇到就分析结束 for(; is_not_division_char(line[i]) && i < len; i++) {} } else if(!div_ && j != 0) { char * word_ = new char[j+1]; strncpy(word_, word, j); word_[j] = '\0'; //printf("(%s)\n", word_); words.push_back(word_); } } } //是否为标识符 int is_tag(char* chrs) { int len = strlen(chrs); //输入字符串有效性校验 if(len < 1) { return FAIL; } //开头字符 if(chrs[0] != '_' && ( chrs[0] < 'A' || (chrs[0] > 'Z' && chrs[0] <'a') || (chrs[0] > 'z') ) ) { return FAIL; } //利用 for(int i = 1; i < strlen(chrs); i++) { //遍历字符串中不符合规定的字符 if( chrs[i] != '_' && ( (chrs[i] > '9' && chrs[i] < 'A') || (chrs[0] > 'Z' && chrs[0] <'a') || (chrs[0] > 'z') ) ) { return FAIL; } } return SUCC; } //是否为常量 int _is_const(char* chrs) { //从开始字符进行分流 int len = strlen(chrs); if(len < 1) { //空字符情况排除 return FAIL; } if(is_const_char(chrs[0]) == SUCC && chrs[len-1] == chrs[0]) { int i = 1; for(; i < len; i++) { if(chrs[i] == chrs[0] && chrs[i-1] != '\\') { break; } } if( i == 2 //考虑到空字符串的可能 && i != len //并且字符串终结符并不再结尾 ) { return ERROR; } return SUCC; } else if(is_dig(chrs[0]) == SUCC) { int i = 1; for(; i < len; i++) { if(is_dig(chrs[i]) != SUCC) { break; } } if(i != len) { return ERROR; } return SUCC; } return FAIL; } //是否为单字符运算符 int is_op(char c) { for(int k = 0; k < SINGLE_OP_COUNT; k++) { if(c == single_operator[k][0]) { return SUCC; } } return FAIL; } //是否为双字符类型运算符 int is_multi_op(char* chrs) { int len = strlen(chrs); if(len != 2) { return FAIL; } for(int i = 0; i < MULTI_OP_COUNT; i++) { //printf("op[%s, %s]\n", chrs, multi_operator[i]); if(!strcmp(chrs, multi_operator[i])) { return SUCC; } } return FAIL; } //是否为保留字[关键字] int is_keys(char* chrs) { int len = strlen(chrs); if(len < 1) { return FAIL; } for(int i = 0; i < KEYS_COUNT; i++) { int klen = strlen(static_key_words[i]); if(klen != len) { continue; } int j = 0; for(; j < klen && j < len; j++) { if(chrs[j] != static_key_words[i][j]) { break; } } if(j == klen) { return SUCC; } } } //是否为边界字符 int is_limit(char c) { for(int i = 0; i < LIMIT_COUNT; i++) { if(c == limit_[i][0]) { return SUCC; } } return FAIL; } //判断是否为注释"//",注释的特征: // 1、一个单词的开头或者一个句子的开头 // 2、将其后续的字符串取完 int is_anno(char* chrs) { int len = strlen(chrs); if(len < 1) { return FAIL; } if(chrs[0] != '/') { return FAIL; } return SUCC; } int read_sour(char* file_name, vector<char*> &output) { ifstream fin; fin.open(file_name); if(!fin) { return FILE_NOT_EXIT; } string line; while(!fin.eof()) { line = ""; getline(fin, line, '\n'); int slen = line.length(); char * line_ = new char[slen+1]; memset_(line_, '\0', slen); strncpy(line_, line.c_str(), slen); line_[slen] = '\0'; output.push_back(line_); } fin.close(); return SUCC; } int collect_words(vector<char*> &input, vector<char*> &output) { int size_ = input.size(); if(size_ < 1) { return FAIL; } for(int i = 0; i < size_; i++) { //printf("-div:%s \n", input[i]); division_str(input[i], output, i); } return SUCC; } //单词分类--词法分析 void word_analize(vector<char*> words, vector< pair<char*, char*> > &result) { //int op_type = 0; //0开始 1接收了一个字符 2接收了两个字符 char multi_op[3]; int size_ = words.size(); //printf("size: %d\n", size_); int ret; for(int i = 0; i < size_; i++) { //printf("-%d\n", i); ret = is_limit(words[i][0]); if(ret == SUCC) { printf("<%s, %s>\n", LIMIT, words[i]); result.push_back(pair<char*, char*>(LIMIT, words[i])); continue; } ret = is_keys(words[i]); if(ret == SUCC) { printf("<%s, %s>\n", KEY, words[i]); result.push_back(pair<char*, char*>(KEY, words[i])); continue; } ret = is_op(words[i][0]); if(ret == SUCC) { //第一层,单运算符 printf("<%s, %s>\n", OP, words[i]); result.push_back(pair<char*, char*>(OP, words[i])); continue; } ret = is_multi_op(words[i]); if(ret == SUCC) { printf("<%s, %s>\n", OP, words[i]); result.push_back(pair<char*, char*>(OP, words[i])); continue; } ret = _is_const(words[i]); if(ret == SUCC) { printf("<%s, %s>\n", CONST, words[i]); result.push_back(pair<char*, char*>(CONST, words[i])); continue; } else if(ret == ERROR) { printf("<%s, %s>\n", ERR, words[i]); result.push_back(pair<char*, char*>(ERR, words[i])); continue; } ret = is_tag(words[i]); if(ret == SUCC) { printf("<%s, %s>\n", TAG, words[i]); result.push_back(pair<char*, char*>(TAG, words[i])); continue; } printf("<%s, %s>\n", ERR, words[i]); result.push_back(pair<char*, char*>(ERR, words[i])); } } //测试用 void scan_words(vector<char*> words) { int size_ = words.size(); for(int i = 0; i < size_; i++) { printf("line: %d, content: %s\n", i, words[i]); } } //主程序进程 int main() { static char FILE_NAME[32] = {"sour.code"}; vector<char*> lines; vector<char*> words; int ret = read_sour(FILE_NAME, lines); if(ret == FILE_NOT_EXIT) { printf("file[%s] not exits\n", FILE_NAME); return -1; } collect_words(lines, words); word_analize(words, tokens); }

    少量分析在代码中~~ 写c++太累了...

    最新回复(0)