网上对虚拟机的解释很多,其实本质就一句话
虚拟机就是机器语言解释器
我们今天要实现汇编语言解释器,下一次再加上ndisasm反汇编器就是真正虚拟机了
注:这里的虚拟机指的是VMware一类的,而不是JVM,python一样的高级语言解释器
上代码
#include <graphics.h>
#include <conio.h>
#include <windows.h>
#include <commdlg.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <unordered_map>
#include <algorithm>// 寄存器声明
unsigned char al = 0, ah = 0, bl = 0, bh = 0, cl = 0, ch = 0, dl = 0, dh = 0;
unsigned short ax = 0, bx = 0, cx = 0, dx = 0, sp = 0x8000, bp = 0;
unsigned int org = 0, end_times = 0, end_AA55 = 0;
bool ZF = false, CF = false, SF = false; // 标志寄存器// 标签和指令指针
std::unordered_map<std::string, size_t> labels;
size_t current_line = 0;
size_t new_current_line;
std::vector<std::string> program_lines;// 内存模拟
std::vector<unsigned char> memory(0x10000, 0); // 64KB内存// 图形输出参数
int textX = 0;
int textY = 48;
const int CHAR_WIDTH = 8;
const int LINE_HEIGHT = 16;
bool graphicsInitialized = false;// 指令解析错误枚举
enum class InstructionError {INVALID_OPCODE,INVALID_OPERAND,LABEL_NOT_FOUND,UNKNOWN_INTERRUPT,OTHER_ERROR
};// 输出错误信息到终端
void printError(const InstructionError& error, const std::string& details = "") {std::cerr << "ERROR: ";switch (error) {case InstructionError::INVALID_OPCODE:std::cerr << "无效的操作码";break;case InstructionError::INVALID_OPERAND:std::cerr << "无效的操作数";break;case InstructionError::LABEL_NOT_FOUND:std::cerr << "标签未找到";break;case InstructionError::UNKNOWN_INTERRUPT:std::cerr << "未知的中断号";break;case InstructionError::OTHER_ERROR:std::cerr << "其他错误";break;}if (!details.empty()) {std::cerr << " - " << details;}std::cerr << std::endl;
}int parseImmediate(const std::string& immediateStr) {std::string result;bool inQuote = false;char quoteChar = '\0';for (size_t i = 0; i < immediateStr.size(); ++i) {const char c = immediateStr[i];if (c == '\'' || c == '"') {if (!inQuote) {inQuote = true;quoteChar = c;result += c;} else if (c == quoteChar) {inQuote = false;result += c;} else {result += c;}} else if (inQuote) {// 直接将引号内的字符添加到结果中,包括空格result += c;} else if (!std::isspace(c)) {// 非空格且不在引号内,将字符添加到结果中result += c;} else if (i > 0 &&!std::isspace(result.back())) {// 如果前一个字符不是空格,添加当前字符以保留中间的空格result += c;}}// 去除结果字符串两端可能残留的空格while (!result.empty() && std::isspace(result.front())) {result.erase(result.begin());}while (!result.empty() && std::isspace(result.back())) {result.erase(result.length() - 1);}if (result.empty()) return 0;if (result.length() == 3 && result[0] == '\'' && result[2] == '\'') {return static_cast<int>(result[1]);}else if (result.find("0x") == 0) {try {return std::stoi(result.substr(2), nullptr, 16);} catch (const std::invalid_argument& e) {throw std::invalid_argument("无效的十六进制立即数:" + result);} catch (const std::out_of_range& e) {throw std::out_of_range("十六进制立即数超出范围:" + result);}}else if (result.back() == 'h') {try {return std::stoi(result.substr(0, result.length() - 1), nullptr, 16);} catch (const std::invalid_argument& e) {throw std::invalid_argument("无效的十六进制立即数(以h结尾):" + result);} catch (const std::out_of_range& e) {throw std::out_of_range("十六进制立即数(以h结尾)超出范围:" + result);}}else {try {return std::stoi(result);} catch (const std::invalid_argument& e) {throw std::invalid_argument("无效的立即数:" + result);} catch (const std::out_of_range& e) {throw std::out_of_range("立即数超出范围:" + result);}}
}std::unordered_map<std::string, unsigned char*>& createRegister8BitMap() {static std::unordered_map<std::string, unsigned char*> map = {{"al", &al}, {"ah", &ah}, {"bl", &bl}, {"bh", &bh},{"cl", &cl}, {"ch", &ch}, {"dl", &dl}, {"dh", &dh}};return map;
}std::unordered_map<std::string, unsigned short*>& createRegister16BitMap() {static std::unordered_map<std::string, unsigned short*> map = {{"ax", &ax}, {"bx", &bx}, {"cx", &cx}, {"dx", &dx},{"sp", &sp}, {"bp", &bp}};return map;
}void UpdateTextPosition() {textX += CHAR_WIDTH;if (textX > 620) {textX = 20;textY += LINE_HEIGHT;}if (textY + LINE_HEIGHT > 480) {cleardevice();textX = 0;textY = 0;}
}void MovInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, dest, src;iss >> opcode >> dest >> src;auto& reg8 = createRegister8BitMap();auto& reg16 = createRegister16BitMap();auto parseOperand = [&](const std::string& op) -> int {if (reg8.count(op)) return *reg8[op];if (reg16.count(op)) return *reg16[op];return parseImmediate(op);};int value = parseOperand(src);if (reg8.count(dest)) {*reg8[dest] = static_cast<unsigned char>(value);}else if (reg16.count(dest)) {*reg16[dest] = static_cast<unsigned short>(value);}
}void CmpInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, op1, op2;iss >> opcode >> op1 >> op2;auto& reg8 = createRegister8BitMap();auto& reg16 = createRegister16BitMap();auto parseOperand = [&](const std::string& op) -> int {if (reg8.count(op)) return *reg8[op];if (reg16.count(op)) return *reg16[op];return parseImmediate(op);};int val1 = parseOperand(op1);int val2 = parseOperand(op2);int result = val1 - val2;ZF = (result == 0);SF = (result < 0);CF = (static_cast<unsigned>(val1) < static_cast<unsigned>(val2));
}void JmpInstruction(const std::string& line) {std::istringstream iss(line);std::string opcode, label;iss >> opcode >> label;if (labels.count(label)) {new_current_line = labels[label];} else {printError(InstructionError::LABEL_NOT_FOUND, "JMP指令中的标签: " + label);}
}void JeInstruction(const std::string& line) {std::istringstream iss(line);std::string opcode, label;iss >> opcode >> label;if (ZF) {if (labels.count(label)) {new_current_line = labels[label];} else {printError(InstructionError::LABEL_NOT_FOUND, "JE指令中的标签: " + label);}} else {new_current_line = current_line + 1;}
}void JneInstruction(const std::string& line) {std::istringstream iss(line);std::string opcode, label;iss >> opcode >> label;if (!ZF) {if (labels.count(label)) {new_current_line = labels[label];} else {printError(InstructionError::LABEL_NOT_FOUND, "JNE指令中的标签: " + label);}} else {new_current_line = current_line + 1;}
}void PushInstruction(const std::string& line) {std::istringstream iss(line);std::string opcode, src;iss >> opcode >> src;auto& reg16 = createRegister16BitMap();unsigned short value = reg16.count(src)? *reg16[src] : parseImmediate(src);sp -= 2;memory[sp] = value & 0xFF;memory[sp + 1] = (value >> 8) & 0xFF;
}void PopInstruction(const std::string& line) {std::istringstream iss(line);std::string opcode, dest;iss >> opcode >> dest;auto& reg16 = createRegister16BitMap();if (reg16.count(dest)) {*reg16[dest] = memory[sp] | (memory[sp + 1] << 8);sp += 2;}
}void XorInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, dest, src;iss >> opcode >> dest >> src;auto& reg8 = createRegister8BitMap();auto& reg16 = createRegister16BitMap();auto parseOperand = [&](const std::string& op) -> int {if (reg8.count(op)) return *reg8[op];if (reg16.count(op)) return *reg16[op];return parseImmediate(op);};int val1 = parseOperand(dest);int val2 = parseOperand(src);int result = val1 ^ val2;if (reg8.count(dest)) {*reg8[dest] = static_cast<unsigned char>(result);}else if (reg16.count(dest)) {*reg16[dest] = static_cast<unsigned short>(result);}ZF = (result == 0);SF = (result < 0);CF = false;
}void PreprocessLabels() {for (size_t i = 0; i < program_lines.size(); ++i) {std::string line = program_lines[i];size_t colonPos = line.find(':');if (colonPos!= std::string::npos) {std::string label = line.substr(0, colonPos);labels[label] = i;program_lines[i] = line.substr(colonPos + 1);std::cout << "Label found: " << label << " at line " << i << std::endl;}}
}void IntInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, interrupt;iss >> opcode >> interrupt;if (interrupt == "0x10" || interrupt == "10h") {if (ah == 0x0E) {if (!graphicsInitialized) {initgraph(640, 480);setbkcolor(BLACK);cleardevice();settextcolor(CYAN);settextstyle(17, 0, _T("Courier New Bold"));graphicsInitialized = true;outtextxy(textX, 0, "VMwork BIOS (PCI)");outtextxy(textX, 16, "This VGA/VBE BIOS is released under the GNU LGPL");settextcolor(RGB(192, 192, 192));}// 处理特殊字符if (al == 0x0D) {outtextxy(textX, textY, " ");textY += LINE_HEIGHT;}else if (al == 0x0A) {outtextxy(textX, textY, " ");textX = 0;}else {char str[2] = { static_cast<char>(al) };outtextxy(textX, textY, " ");outtextxy(textX, textY, str);UpdateTextPosition();outtextxy(textX, textY, "|");}}if (ah == 0x02 && bh == 0) {textX = 0;textY = 0;}if (ax == 0x0600 && bx == 0x0700 && cx == 0 && dx == 0x184f) {cleardevice();}}else if (interrupt == "0x16" || interrupt == "16h") {if (ah == 0) {while (true) {if (_kbhit()) {al = _getch();break;}}}}else {printError(InstructionError::UNKNOWN_INTERRUPT, "未知的中断号: " + interrupt);}
}void CallInstruction(const std::string& line) {std::vector<std::string> tokens;std::istringstream iss(line);std::string token;while (iss >> token) {tokens.push_back(token);}if (tokens.size() < 2) {printError(InstructionError::INVALID_OPERAND, "CALL指令缺少操作数");return;}std::string label = tokens.back();if (labels.count(label)) {// 压入返回地址(当前行号的下一条指令)unsigned short return_line = current_line + 1;sp -= 2;memory[sp] = return_line & 0xFF;memory[sp + 1] = (return_line >> 8) & 0xFF;new_current_line = labels[label];} else {printError(InstructionError::LABEL_NOT_FOUND, "CALL指令中的标签: " + label);}
}void OrgInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, interrupt;iss >> opcode >> interrupt;if (interrupt == "0x7c00" || interrupt == "0x7C00") {org = 0x7c00;} else {printError(InstructionError::INVALID_OPERAND, "ORG指令的操作数无效: " + interrupt);}
}void TimesInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, interrupt;iss >> opcode >> interrupt;if (interrupt == "510-($-$$) db 0" || interrupt == "510-($-$$)") {end_times = 1;} else {printError(InstructionError::INVALID_OPERAND, "TIMES指令的操作数无效: " + interrupt);}
}void DwInstruction(const std::string& line) {std::string processedLine = line;std::replace(processedLine.begin(), processedLine.end(), ',', ' ');std::istringstream iss(processedLine);std::string opcode, interrupt;iss >> opcode >> interrupt;if (interrupt == "0xAA55" || interrupt == "0xaa55") {end_AA55 = 1;} else {printError(InstructionError::INVALID_OPERAND, "DW指令的操作数无效: " + interrupt);}
}int main(int argc, char* argv[]) {std::ifstream file;if (argc == 1) {OPENFILENAMEA ofn;char szFileName[MAX_PATH] = "";ZeroMemory(&ofn, sizeof(ofn));ofn.lStructSize = sizeof(ofn);ofn.hwndOwner = NULL;ofn.lpstrFilter = "所有文件 (*.*)\0*.*\0";ofn.lpstrFile = szFileName;ofn.nMaxFile = MAX_PATH;ofn.lpstrTitle = "请选择一个文件";ofn.Flags = OFN_DONTADDTORECENT | OFN_FILEMUSTEXIST;if (GetOpenFileNameA(&ofn)) {file.open(szFileName);}else {if (graphicsInitialized) {closegraph();}return 0;}}else {file.open(argv[1]);}if (!file.is_open()) {std::cerr << "无法打开文件" << std::endl;if (graphicsInitialized) {closegraph();}return 1;}// 先将文件内容读取到 program_lines 中std::string line;while (std::getline(file, line)) {program_lines.push_back(line);}file.close();for (auto& progLine : program_lines) {for (size_t i = 0; i < progLine.size(); ++i) {if (i < progLine.size() - 2 && progLine[i] == '\'' && progLine[i + 1] ==' ' && progLine[i + 2] == '\'') {progLine[i] = static_cast<char>(0x20);progLine.erase(i + 1, 2); // 移除后面的空格和单引号}}}PreprocessLabels();// 重置指令指针和新的指令指针new_current_line = current_line;while (current_line < program_lines.size()) {std::istringstream iss(program_lines[current_line]);std::string opcode;iss >> opcode;if (opcode == "mov") MovInstruction(program_lines[current_line]);else if (opcode == "int") IntInstruction(program_lines[current_line]);else if (opcode == "org") OrgInstruction(program_lines[current_line]);else if (opcode == "times") TimesInstruction(program_lines[current_line]);else if (opcode == "dw") DwInstruction(program_lines[current_line]);else if (opcode == "cmp") CmpInstruction(program_lines[current_line]);else if (opcode == "jmp") JmpInstruction(program_lines[current_line]);else if (opcode == "je" || opcode == "jz") JeInstruction(program_lines[current_line]);else if (opcode == "jne" || opcode == "jnz") JneInstruction(program_lines[current_line]);else if (opcode == "push") PushInstruction(program_lines[current_line]);else if (opcode == "pop") PopInstruction(program_lines[current_line]);else if (opcode == "xor") XorInstruction(program_lines[current_line]);else if (opcode == "call") CallInstruction(program_lines[current_line]);else if (opcode != "\n" || opcode != "\t") std::cout << "warning:未识别的指令:" << opcode << "\n";if (opcode == "jmp" || opcode == "je" || opcode == "jne") {current_line = new_current_line;}else {current_line++;}new_current_line = current_line + 1; }
/*if (org!= 0x7c00 || end_times == 0 || end_AA55 == 0) {closegraph();}
*/if (graphicsInitialized) {_getch();//closegraph();}return 0;
}
编译:
g++ main.cpp -o VMwork -std=c++11 -leasyx -lcomdlg32
os.asm:
org 0x7c00start:mov bp, 0x8000mov sp, bp.print:mov ah, 0x0Emov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, 'H'int 0x10mov al, 'a'int 0x10mov al, 'n'int 0x10mov al, 'O'int 0x10mov al, 'S'int 0x10mov al, '>'int 0x10mov al, '>'int 0x10.wait_input:mov ah, 0x00int 0x16cmp al, 'c'je .check_input_c1cmp al, 'e'je .check_input_ecmp al, 'p'je .check_input_6.pycmp al, '.'je .check_input_pycmp al, 'l'je .check_input_lcmp al, 0x0Dje .bad_inputmov ah, 0x0Eint 0x10jmp .wait_input.check_input_l:mov ah, 0x0Eint 0x10mov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, 's'jne .wait_inputmov ah, 0x00int 0x16cmp al, 0x0Djne .wait_inputmov ah, 0x0Emov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, 'p'int 0x10mov al, ' 'int 0x10mov al, ' 'int 0x10mov al, 'P'int 0x10mov al, 'Y'int 0x10mov al, ' 'int 0x10mov al, ' 'int 0x10mov al, '1'int 0x10mov al, '2'int 0x10mov al, 'B'int 0x10mov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, 'o'int 0x10mov al, 's'int 0x10mov al, ' 'int 0x10mov al, 'S'int 0x10mov al, 'Y'int 0x10mov al, 'S'int 0x10mov al, ' 'int 0x10mov al, '1'int 0x10mov al, '4'int 0x10mov al, '4'int 0x10mov al, '0'int 0x10mov al, 'K'int 0x10mov al, 'B'int 0x10jmp .print.check_input_py:mov ah, 0x0Eint 0x10mov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, '\'jne .wait_inputmov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, 'p'jne .wait_inputmov ah, 0x00int 0x16cmp al, 0x0Djne .wait_inputmov ah, 0x0Emov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, '6'int 0x10mov al, '6'int 0x10mov al, '6'int 0x10jmp .print.check_input_e:mov ah, 0x0Eint 0x10mov ah, 0x00int 0x16cmp al, 0x0Djne .wait_inputMOV AL,0x13MOV AH,0x00INT 0x10jmp .done.check_input_c1:mov ah, 0x0Eint 0x10mov ah, 0x00int 0x16cmp al, 0x0Djne .wait_input
.bad_input:mov ah, 0x0Emov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, 'b'int 0x10mov al, 'a'int 0x10mov al, 'd'int 0x10jmp .print
.done:retjmp .done.check_input_6.py:mov ah, 0x0Eint 0x10mov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, '.'jne .wait_inputmov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, 'p'jne .wait_inputmov ah, 0x00int 0x16mov ah, 0x0Eint 0x10cmp al, 'y'jne .wait_inputmov ah, 0x00int 0x16cmp al, 0x0Djne .wait_inputmov ah, 0x0Emov al, 0x0Dint 0x10mov al, 0x0Aint 0x10mov al, 'p'int 0x10mov al, 'r'int 0x10mov al, 'i'int 0x10mov al, 'n'int 0x10mov al, 't'int 0x10mov al, '('int 0x10mov al, '"'int 0x10mov al, '6'int 0x10mov al, '6'int 0x10mov al, '6'int 0x10mov al, '"'int 0x10mov al, ')'int 0x10jmp .printtimes 510-($-$$) db 0
dw 0xAA55
双击VMwork.exe,选择os.asm
运行非常成功
我们已经能解析整个引导扇区了,操作系统内核nasm代码甚至不用修改就能运行
注意,输入是输入到终端
只要
nasm os.asm -o os.img
就可以直接VMware/bochs/qemu运行os.img了