#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <vector>
#include <map>
#include <string.h>
using namespace std;
//约定:每一行程序以 \n结尾
//约定:标识符的最大长度为64
//关于注释 暂时仅支持"//"格式
//保留字,
const int KEYS_COUNT = 32;
static char static_key_words[32][20] = {
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
//常数规约,常数出现再赋值语句的右边
//运算符最多由2个字符组成
//多字符运算符开头
static char both_operator_com[10][2] = {
">", "<", "=", "-", "+",
"!", "&", "|", "/", "*"
};
//注释判断逻辑
static char annotation_char[2] = "\\";
//常量关于字符串和字符
const int CONST_CHARS_COUNT = 2;
static char const_chars[2][2] = {
"\"", "'"
};
//返回类型定义
const int SUCC = 0;
const int FAIL = 1;
const int ERROR = -1;
const int FILE_NOT_EXIT = 10;
//分割符, 不将'/'加入的原因是对于'/'和注释部分分开处理
const int DIVISION_CHARS_COUNT = 21;
static char division_chars[21][2] = {
" ", ">", "<", "=", "-",
"+", "!", "&", "|", "%",
"*", ";", "(", ",", "/",
")", "{", "}", "[", "]",
"."
};
const int DIVISION_MULTI_CHARS_COUNT = 13;
static char division_multi_chars[13][3] = {
">>", "<<", "<=", ">=", "+=",
"-=", "*=", "/=", "!=", "&&",
"*=", "||", "=="
};
//双字符校验
const int DIVISION_MULTI_CHAR_COUNT = 5;
static char division_multi_char[5][2] = {
">", "<", "=", "&", "|"
};
//单个运算符
const int SINGLE_OP_COUNT = 14;
static char single_operator[14][2] = {
"+", "-", "*", "/", "<",
">", "=", "^", ",", "&",
"|", "%", "~", "!"
};
//双字符运算符
const int MULTI_OP_COUNT = 13;
static char multi_operator[13][3] = {
">>", "<<", "<=", ">=", "+=",
"-=", "*=", "/=", "!=", "&&",
"==", "*=", "||"
};
//边界字符
const int LIMIT_COUNT = 8;
static char limit_[8][2] = {
"(", ")", "{", "}", ".",
"[", "]", ";"
};
//当前扫描状态,对于赋值语句而言有效,定义
int status = 0;
static char TAG[4] = "tag"; //标识符
static char KEY[4] = "key"; //保留字
static char CONST[6] = "const"; //常量
static char OP[3] = "op"; //运算符
static char LIMIT[6] = "limit"; //边界
static char ERR[6] = "error"; //错误
vector< pair<char*, char*> > tokens; //tokens
vector<char*> anno; //注释
//内存初始化
void memset_(char* chrs, char c, int length)
{
for(int i = 0; i < length; i++) {
chrs[i] = c;
}
}
//不是以单字符进行分隔符
int is_not_division_char(char c)
{
for(int k = 0; k < DIVISION_CHARS_COUNT; k++) {
if(c == division_chars[k][0]) {
return FAIL;
}
}
return SUCC;
}
//不是以可能的双字符进行分割
int is_not_multi_division_char(char c)
{
for(int k = 0; k < DIVISION_MULTI_CHAR_COUNT; k++) {
if(c == division_multi_char[k][0]) {
return FAIL;
}
}
return SUCC;
}
//是否为双字符分割符
int is_multi_division_chars(char* chrs)
{
int len = strlen(chrs);
if(len != 2) {
return FAIL;
}
for(int i = 0; i < DIVISION_MULTI_CHARS_COUNT; i++) {
if(!strcmp(chrs, division_multi_chars[i])) {
return SUCC;
}
}
return FAIL;
}
//是否为数字字符
int is_dig(char c)
{
if(c > '9' || c < '0') {
return FAIL;
}
return SUCC;
}
//是否为常量字符(字符串/字符),对开头字符进行检查
int is_const_char(char c)
{
for(int k = 0; k < CONST_CHARS_COUNT; k++) {
if(c == const_chars[k][0]) {
return SUCC;
}
}
return FAIL;
}
/** 将句子分割成单词
* @param line 带分割字符串
* @param words 引用的结果
* @param line_ 行号
*/
void division_str(char* line, vector<char*> &words, int line_)
{
int len = strlen(line);
if(len < 1) {
return;
}
char word[1025];
for(int i = 0; i < len; i++)
{
memset_(word, '\0', 1025);
int j = 0;
int div_ = 0;
for(; j < 1025 && i < len; j++, i++) {
//printf("char:%c\n", line[i]);
if(is_not_division_char(line[i]) == SUCC) { //一般分割符
if(line[i] == '\t' || line[i] == '\r') {
//printf("blank:%c\n", line[i]);
j--;
continue;
}
word[j] = line[i];
} else {
if(line[i] != ' ' && line[i] != '/') {
//单字符分割符和双字符分隔符
if(is_not_multi_division_char(line[i+1]) == FAIL) { //双字符分割符
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
char* div_multi_ = new char[3];
div_multi_[0] = line[i];
div_multi_[1] = line[i+1];
if(is_multi_division_chars(div_multi_) == SUCC) {
i++;
words.push_back(div_multi_);
}
//printf("==multi %s\n", div_multi_);
div_ = 1;
} else {
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
//单子符分割符
char* div_single_ = new char[2];
div_single_[0] = line[i];
div_single_[1] = '\0';
words.push_back(div_single_);
div_ = 1;
//printf("==single %s\n", div_single_);
}
} else if(line[i] == '/') {
//争对注释"//"的处理
if(line[i+1] == '/') {
i++;
for(; i < len; i++, j++) {
word[j] = line[i];
}
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
//printf("(%s)\n", word_);
anno.push_back(word_);
div_ = 1;
} else {
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
//对除号的处理
char* div_single_ = new char[2];
div_single_[0] = line[i];
div_single_[1] = '\0';
words.push_back(div_single_);
div_ = 1;
}
}
//printf("[%c]\n", line[i]);
break;
}
}
word[j] = '\0';
//printf("== %s\n", word);
//存在标识符长度超过64
if(j == 1025 && is_not_division_char(line[i+1])) {
i++;
//继续遍历字符串直到遇到分割符,如果没有遇到就分析结束
for(; is_not_division_char(line[i]) && i < len; i++) {}
} else if(!div_ && j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
//printf("(%s)\n", word_);
words.push_back(word_);
}
}
}
//是否为标识符
int is_tag(char* chrs)
{
int len = strlen(chrs);
//输入字符串有效性校验
if(len < 1) {
return FAIL;
}
//开头字符
if(chrs[0] != '_' &&
(
chrs[0] < 'A'
|| (chrs[0] > 'Z' && chrs[0] <'a')
|| (chrs[0] > 'z')
)
) {
return FAIL;
}
//利用
for(int i = 1; i < strlen(chrs); i++)
{
//遍历字符串中不符合规定的字符
if(
chrs[i] != '_'
&& (
(chrs[i] > '9' && chrs[i] < 'A')
|| (chrs[0] > 'Z' && chrs[0] <'a')
|| (chrs[0] > 'z')
)
) {
return FAIL;
}
}
return SUCC;
}
//是否为常量
int _is_const(char* chrs)
{
//从开始字符进行分流
int len = strlen(chrs);
if(len < 1) {
//空字符情况排除
return FAIL;
}
if(is_const_char(chrs[0]) == SUCC && chrs[len-1] == chrs[0]) {
int i = 1;
for(; i < len; i++) {
if(chrs[i] == chrs[0] && chrs[i-1] != '\\') {
break;
}
}
if(
i == 2 //考虑到空字符串的可能
&& i != len //并且字符串终结符并不再结尾
) {
return ERROR;
}
return SUCC;
} else if(is_dig(chrs[0]) == SUCC) {
int i = 1;
for(; i < len; i++) {
if(is_dig(chrs[i]) != SUCC) {
break;
}
}
if(i != len) {
return ERROR;
}
return SUCC;
}
return FAIL;
}
//是否为单字符运算符
int is_op(char c)
{
for(int k = 0; k < SINGLE_OP_COUNT; k++) {
if(c == single_operator[k][0]) {
return SUCC;
}
}
return FAIL;
}
//是否为双字符类型运算符
int is_multi_op(char* chrs)
{
int len = strlen(chrs);
if(len != 2) {
return FAIL;
}
for(int i = 0; i < MULTI_OP_COUNT; i++) {
//printf("op[%s, %s]\n", chrs, multi_operator[i]);
if(!strcmp(chrs, multi_operator[i])) {
return SUCC;
}
}
return FAIL;
}
//是否为保留字[关键字]
int is_keys(char* chrs)
{
int len = strlen(chrs);
if(len < 1) {
return FAIL;
}
for(int i = 0; i < KEYS_COUNT; i++) {
int klen = strlen(static_key_words[i]);
if(klen != len) {
continue;
}
int j = 0;
for(; j < klen && j < len; j++) {
if(chrs[j] != static_key_words[i][j]) {
break;
}
}
if(j == klen) {
return SUCC;
}
}
}
//是否为边界字符
int is_limit(char c)
{
for(int i = 0; i < LIMIT_COUNT; i++) {
if(c == limit_[i][0]) {
return SUCC;
}
}
return FAIL;
}
//判断是否为注释"//",注释的特征:
// 1、一个单词的开头或者一个句子的开头
// 2、将其后续的字符串取完
int is_anno(char* chrs)
{
int len = strlen(chrs);
if(len < 1) {
return FAIL;
}
if(chrs[0] != '/') {
return FAIL;
}
return SUCC;
}
int read_sour(char* file_name, vector<char*> &output)
{
ifstream fin;
fin.open(file_name);
if(!fin) {
return FILE_NOT_EXIT;
}
string line;
while(!fin.eof()) {
line = "";
getline(fin, line, '\n');
int slen = line.length();
char * line_ = new char[slen+1];
memset_(line_, '\0', slen);
strncpy(line_, line.c_str(), slen);
line_[slen] = '\0';
output.push_back(line_);
}
fin.close();
return SUCC;
}
int collect_words(vector<char*> &input, vector<char*> &output)
{
int size_ = input.size();
if(size_ < 1) {
return FAIL;
}
for(int i = 0; i < size_; i++) {
//printf("-div:%s \n", input[i]);
division_str(input[i], output, i);
}
return SUCC;
}
//单词分类--词法分析
void word_analize(vector<char*> words, vector< pair<char*, char*> > &result)
{
//int op_type = 0; //0开始 1接收了一个字符 2接收了两个字符
char multi_op[3];
int size_ = words.size();
//printf("size: %d\n", size_);
int ret;
for(int i = 0; i < size_; i++) {
//printf("-%d\n", i);
ret = is_limit(words[i][0]);
if(ret == SUCC) {
printf("<%s, %s>\n", LIMIT, words[i]);
result.push_back(pair<char*, char*>(LIMIT, words[i]));
continue;
}
ret = is_keys(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", KEY, words[i]);
result.push_back(pair<char*, char*>(KEY, words[i]));
continue;
}
ret = is_op(words[i][0]);
if(ret == SUCC) { //第一层,单运算符
printf("<%s, %s>\n", OP, words[i]);
result.push_back(pair<char*, char*>(OP, words[i]));
continue;
}
ret = is_multi_op(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", OP, words[i]);
result.push_back(pair<char*, char*>(OP, words[i]));
continue;
}
ret = _is_const(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", CONST, words[i]);
result.push_back(pair<char*, char*>(CONST, words[i]));
continue;
} else if(ret == ERROR) {
printf("<%s, %s>\n", ERR, words[i]);
result.push_back(pair<char*, char*>(ERR, words[i]));
continue;
}
ret = is_tag(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", TAG, words[i]);
result.push_back(pair<char*, char*>(TAG, words[i]));
continue;
}
printf("<%s, %s>\n", ERR, words[i]);
result.push_back(pair<char*, char*>(ERR, words[i]));
}
}
//测试用
void scan_words(vector<char*> words)
{
int size_ = words.size();
for(int i = 0; i < size_; i++) {
printf("line: %d, content: %s\n", i, words[i]);
}
}
//主程序进程
int main()
{
static char FILE_NAME[32] = {"sour.code"};
vector<char*> lines;
vector<char*> words;
int ret = read_sour(FILE_NAME, lines);
if(ret == FILE_NOT_EXIT) {
printf("file[%s] not exits\n", FILE_NAME);
return -1;
}
collect_words(lines, words);
word_analize(words, tokens);
}
少量分析在代码中~~ 写c++太累了...