x86 NEMU PA1

NEMU PA1

https://nju-projectn.github.io/ics-pa-gitbook/ics2020/PA1.html

没用bash,用的fish

export NEMU_HOME=/mnt/hgfs/share/ics2020/nemu
export AM_HOME=/mnt/hgfs/share/ics2020/abstract-machine
source ~/.config/fish/config.fish

abstruct machine 里change SIGSTKSZ to 8192

vscode 配置c_cpp_properties.json里一些预编译,来配置解析选项解锁正确的代码解析

可以用make -nB查看一下项目里原本定义的预编译宏

  • -n 仅显示将要执行的命令,而不实际执行它们。
  • -B 表示 “always-make”,它告诉 make 忽略时间戳检查,强制重新构建目标文件
grxer@Ubuntu22 /m/h/s/i/nemu (pa1)> make ARCH=x86-nemu ALL=dummy run -nB
gcc -D__DIFF_REF_KVM__ -O2 -MMD -Wall -Werror -ggdb3 -I./include -I./src/engine/interpreter -D__ENGINE_interpreter__ -D__ISA__=x86 -D__ISA_x86__ -D_ISA_H_=\"isa/x86.h\" -c -o build/obj-x86-interpreter/monitor/monitor.o src/monitor/monitor.c
echo + CC src/isa/x86/decode.c
//c_cpp_properties.json
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**"
],
"defines": [
"__DIFF_REF_KVM__",
"__ENGINE_interpreter__",
"__ISA__=x86 ",
"__ISA_x86__",
"_ISA_H_=\"isa/x86.h\""
],
"compilerPath": "/usr/bin/gcc",
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

额,后来用了clangd做解析。。。。 bear –make自动生成数据库,好用尼

或者,下面这个好像是专门给makefile用的

pip install compiledb
compiledb -n make ARCH=x86-nemu ALL=dummy
//-n 表示no-build

开始pa之旅:)

寄存器结构体

利用匿名union和匿名struct去写

typedef struct {

/* Do NOT change the order of the GPRs' definitions. */

/* In NEMU, rtlreg_t is exactly uint32_t. This makes RTL instructions
* in PA2 able to directly access these registers.
*/
union {
union {
uint32_t _32;
uint16_t _16;
uint8_t _8[2];
} gpr[8];
struct {
rtlreg_t eax, ecx, edx, ebx, esp, ebp, esi, edi;
};
};
vaddr_t pc;
} x86_CPU_state;

关于assert实现与封装

#define assert(cond) if (!(cond)) panic(...);

上面写法在一些特殊情况是有问题的

if (...) assert(xxx); // 上面的assert对么?
else ...

assert展开后下面的else会被宏里的if吸收,所以我们吸收后面的if是一个整体

nemu里采用了下面的写法

#define assert(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "Fail @ %s:%d", __FILE__, __LINE__); \
exit(1); \
} \
} while (0)

GNU提供C语言扩展({ ... })来吧这个语句当作整体,但这不是c标准

#define assert(cond) ({ ... })

去看了一下glibc2.35的源码,依旧存在#define assert(cond) if (!(cond)) panic(...);的写法,但是基本上后面都没有else,但或许是个隐患

info r

static int cmd_info(char *args) {
if (0 == strncmp("r", args, 1)) {
isa_reg_display();
}
return 0;
}
void isa_reg_display() {
for (int i = 0; i <= 7; i++) {
printf("\033[1;31m%s:0x%x\t\033[0m ", regsl[i], cpu.gpr[i]._32);
if (i == 3) putchar('\n');
}
printf("\033[1;33m\n%s:0x%x\n\033[0m", "pc", cpu.pc);
}

si N

static int cmd_si(char *args) {
if (NULL != args) {
cpu_exec(strtoul(args, NULL, 10));
} else {
cpu_exec(1);
}
return 0;
}

x N EXPR

static int cmd_x(char *args) {
int times = atoi(strtok(NULL, " "));
vaddr_t address = (vaddr_t)strtoul(strtok(NULL, " "), NULL, 16);
for (int i = 0; i < times; i++) {
if (0 == i % 4)
printf("\033[1;34m%08x: \033[0m", address);
printf("0x%08x ", vaddr_read(address, 4));
address += 4;
if (0 == (i+1) % 4)//在地址输出前输出换行
putchar('\n');
}
if(times%4)
putchar('\n');
return 0;
}

p 表示式求值

enum {
/* TODO: Add more token types */
TK_NOTYPE = 256,
TK_EQ = 1,
TK_DEC = 2,
TK_ADD = 3,
TK_SUB = 4,
TK_MUL = 5,
TK_DIV = 6,
TK_BRAL = 7,
TK_BRAR = 8,
TK_DEREF = 9,
TK_HEX = 10,
TK_REG=11

};

static struct rule {
char *regex;
int token_type;
} rules[] = {

/* TODO: Add more rules.
* Pay attention to the precedence level of different rules.
*/
{"0[xX][0-9a-fA-F]+", TK_HEX},//这个要比十进制靠前,不然会被十进制把0x的0给匹配走
{"[0-9]+", TK_DEC}, {" +", TK_NOTYPE}, // spaces
{"\\+", TK_ADD}, // plus第一个/是为了转义c语言,第二个/是为了转义正则
{"-", TK_SUB}, {"\\*", TK_MUL}, {"/", TK_DIV},
{"\\(", TK_BRAL}, {"\\)", TK_BRAR}, {"==", TK_EQ}, // equal
{"\\$[a-z]+", TK_REG}
};
/* TODO: Now a new token is recognized with rules[i]. Add codes
* to record the token in the array `tokens'. For certain types
* of tokens, some extra actions should be performed.
*/

switch (rules[i].token_type) {
case TK_NOTYPE:
break;
case TK_EQ:
case TK_DEC:
case TK_ADD:
case TK_SUB:
case TK_MUL:
case TK_DIV:
case TK_BRAL:
case TK_BRAR:
case TK_HEX:
case TK_REG:
tokens[nr_token].type = rules[i].token_type;
strncpy(tokens[nr_token].str, substr_start, substr_len);
tokens[nr_token].str[substr_len] = '\0';
nr_token++;
break;
//检测是否被括号包围
static bool check_parentheses(int start, int end) {
int bracket = 0;
if (tokens[start].type == TK_BRAL && tokens[end].type == TK_BRAR) {
for (int i = start; i <= end; i++) {
if (tokens[i].type == TK_BRAL) {
bracket++;
} else if (tokens[i].type == TK_BRAR) {
bracket--;
if (i != end && 0 == bracket) return false;
}
}
if (0 == bracket) {
return true;
}
}
return false;
}
//检测括号是否表达式
static bool check_exp_is_valid(int start,int end) {
int bracket = 0;
for (int i = start; i <= end; i++) {
if (tokens[i].type == TK_BRAL) {
bracket++;
} else if (tokens[i].type == TK_BRAR) {
bracket--;
if (bracket < 0) return false;
}
}
return true;
}
//找出主运算符
int principal_operator(int start,int end) {
int bracket=0;
int operator= 0;
bool is_add_or_sub=false;
for (int i=start; i<=end; i++) {
if (tokens[i].type == TK_BRAL) {
bracket++;
continue;
}
if(tokens[i].type==TK_BRAR){
bracket--;
continue;
}
if (0==bracket) {
if (tokens[i].type == TK_ADD || tokens[i].type == TK_SUB) {
operator=i;
is_add_or_sub=true;
} else if (false == is_add_or_sub &&
(tokens[i].type == TK_DIV || tokens[i].type == TK_MUL||tokens[i].type ==TK_DEREF||tokens[i].type==TK_REG)) {//应该把优先级最高的放后面
operator=i;
}
}
}
return operator;
}
static bool answer_valid=true;
//递归求值
static int32_t eval(int start, int end) {
if (start == end) {
if(tokens[start].type==TK_DEC)
return atoi(tokens[start].str);
else if(tokens[start].type==TK_HEX)
return strtol(tokens[start].str,NULL,16);
else if (tokens[start].type==TK_REG) {
int res= isa_reg_str2val(tokens[start].str,&answer_valid);
if (!answer_valid) {
printf("reg exp error");
}
return res;
}
} else if (true == check_parentheses(start, end)) {
return eval(start+1,end-1);
} else {
if (check_exp_is_valid(start,end)==false) {//check_parentheses返回false有两种情况,一种是没有被括号包围但表表达式正确,另一种是表达式就不对
answer_valid=false;
return false;//这里不会有太多无意义的性能消耗,第一次就会被检测出来
}
int operator=principal_operator(start, end);
if (tokens[operator].type==TK_DEREF) {
return vaddr_read(strtoul(tokens[operator+1].str,NULL,16),4);
}

int32_t sum1=eval(start,operator-1);
int32_t sum2=eval(operator+1,end);
switch (tokens[operator].type) {
case TK_ADD:
return sum1 + sum2;
case TK_SUB:
return sum1 - sum2;
case TK_MUL:
return sum1 * sum2;
case TK_DIV:
return sum1 / sum2;
}
}
return 0;
}
int32_t expr(char *e, bool *success) {
answer_valid=true;
if (!make_token(e)) {
*success = false;
return 0;
}
for (int i=0; i<=nr_token-1; i++) {
if (tokens[i].type == TK_MUL &&
(i == 0 ||
(tokens[i - 1].type != TK_DEC || tokens[i - 1].type != TK_BRAL ||
tokens[i - 1].type != TK_HEX))) {
tokens[i].type =TK_DEREF;
}
}
/* TODO: Insert codes to evaluate the expression. */
int32_t answer=eval(0,nr_token-1);
*success=answer_valid;
return answer;
}

static int cmd_p(char *args) {
bool flag=true;
int32_t answer = expr(args, &flag);
if (false==flag) {
puts("Error expression");
return 1;
}
printf("%s: %d 0x%x\n",args,answer,answer);
return 0;
}

监视点

typedef struct watchpoint {
int NO;
struct watchpoint *next;

/* TODO: Add more members if necessary */
sword_t value;
char expr[32];
} WP;
WP* new_wp(int32_t value,char *expr);
void free_wp(WP *wp);
bool check_wp();
void dispaly_wp();
WP* get_wp(char* s);
void init_wp_pool() {
int i;
for (i = 0; i < NR_WP; i ++) {
wp_pool[i].NO = i;
wp_pool[i].next = &wp_pool[i + 1];
memset(wp_pool[i].expr, 0, 32);
}
wp_pool[NR_WP - 1].next = NULL;

head = NULL;
free_ = wp_pool;
}

/* TODO: Implement the functionality of watchpoint */
WP* new_wp(int32_t value,char *expr) {
if (NULL==free_) {
puts("Dont hava free watch pointer");
exit(-1);
}
WP* temp = free_;
free_=free_->next;
temp->next = head;
head = temp;
head->value=value;
memcpy(head->expr,expr,strlen(expr));
return head;
}
void free_wp(WP* wp) {
if (!wp) {
puts("dont have this watch pointer");
return;
}
if (wp==head) {
head =head->next;
} else {
WP *temp=head;
while (temp !=NULL &&temp->next!=wp) {
temp=temp->next;
}
temp->next=temp->next->next;
}
wp->next=free_;
free_=wp;
wp->value = 0;
memset(wp->expr, 0, 32);
}
bool check_wp() {
WP* temp=head;
bool equal=true;
while (temp != NULL) {
word_t new_value=expr(temp->expr,NULL);
if(new_value!=temp->value) {
printf("Num:%d Expr:%s at:%x\n",temp->NO,temp->expr,temp->value);
printf("old value = 0x%x\nnew value = 0x%x\n",temp->value,new_value);
equal = false;
continue;
}
temp=temp->next;
}
return equal;
}
void dispaly_wp() {
if (NULL==head) {
puts("No watchpoints.");
return;
}
WP* temp = head;
puts("Num\tWhat\t");
while (temp!=NULL) {
printf("%d\t0x%x\t\n",temp->NO,temp->value);
temp=temp->next;
}
}
WP* get_wp(char* s) {
return (wp_pool + atoi(s)); // wp_pool为WP*类型,所以他+n=(char *)wp_pool+sizeof(WP)
}
static int cmd_w(char *args) {
#ifdef DEBUG
bool flag;
WP* temp= new_wp(expr(args,&flag),args);
if (false == flag) {
puts("Error expression in watch");
return 1;
}
printf("watch pointer at 0x%x\n", temp->value);
return 0;
#endif
puts("NOT ON DEBUG PATTERN");
return 0;
}

cpu-exec.c

#ifdef DEBUG
asm_print(this_pc, seq_pc - this_pc, n < MAX_INSTR_TO_PRINT);

/* TODO: check watchpoints here. */
if (check_wp()) {
nemu_state.state=NEMU_STOP;
return;
}
#endif

info w

static int cmd_info(char *args) {
if (0 == strncmp("r", args, 1)) {
isa_reg_display();
}
if (0 == strncmp("w", args, 1)) {
dispaly_wp();
}
return 0;
}

d NUM

static int cmd_d(char *args) {
WP * temp=get_wp(args);
free_wp(temp);
return 0;
}

调试器所有command

static struct {
char *name;
char *description;
int (*handler)(char *);
} cmd_table[] = {
{"help", "Display informations about all supported commands", cmd_help},
{"c", "Continue the execution of the program", cmd_c},
{"q", "Exit NEMU", cmd_q},
{"si", "Single Instruction", cmd_si},
{"info", "info reg or watch", cmd_info},
{"x", "show memcory info", cmd_x},
{"p", "pppp", cmd_p},
{"watch", "watch a point", cmd_w},
{"d","delete a watch pointer",cmd_d}
/* TODO: Add more commands */

};