国土资源网站建设方案/百度免费推广怎么操作
重点难点:
1. 理解内存中二进制连续数据存储,如何处理。
2. 理解Table数据结构和Page结构,以及对应的select和insert操作。
测试驱动开发,首先先看我们想要的结果
~ ./db
db > insert 1 cstack foo@bar.com
Executed.
db > insert 2 bob bob@example.com
Executed.
db > select
(1, cstack, foo@bar.com)
(2, bob, bob@example.com)
Executed.
db > insert foo bar 1
Syntax error. Could not parse statement.
db > .exit
~
我们先把这一个简单的用户表,硬编码到代码里面
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct {uint32_t id;char username[COLUMN_USERNAME_SIZE];char email[COLUMN_EMAIL_SIZE];
} Row;void print_row(Row* row) {printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}
然后我们去看主函数包括三个switch部分
第一部分:如果是以.开头的命令,那么解析这个命令,目前这个功能只有.exit
第二部分:解析statement命令,目前只有INSERT和SELECT
typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType;
第三部分:处理statement命令,目前只有INSERT和DELETE
int main(int argc, char* argv[]) {InputBuffer* input_buffer = new_input_buffer();Table* table = new_table();while (true) {print_prompt();read_input(input_buffer);if (input_buffer->buffer[0] == '.') {switch (do_meta_command(input_buffer)) {case (META_COMMAND_SUCCESS):continue;case (META_COMMAND_UNRECOGNIZED_COMMAND):printf("Unrecognized command '%s'\n", input_buffer->buffer);continue;}}Statement statement;switch (prepare_statement(input_buffer, &statement)) {case (PREPARE_SUCCESS):break;case (PREPARE_SYNTAX_ERROR):printf("Syntax error. Could not parse statement.\n");continue;case (PREPARE_UNRECOGNIZED_STATEMENT):printf("Unrecognized keyword at start of '%s'.\n",input_buffer->buffer);continue;}switch (execute_statement(&statement, table)) {case (EXECUTE_SUCCESS):printf("Executed.\n");break;case (EXECUTE_TABLE_FULL):printf("Error: Table full.\n");break;}}
}
下一步,就搞清楚数据是怎么在内存中存储的。
主要是Table结构
typedef struct {uint32_t num_rows;void* pages[TABLE_MAX_PAGES];
} Table;
Table由若干页组成
这些很多行的数据是分成若干页存储的,一页的大小为4K
const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
每一页存储了多少行的数据,等于每一页的大小去处以每一行的大小
总的最大值就是页数乘行数
我们先看表的顺序扫描
ExecuteResult execute_select(Statement* statement, Table* table) {Row row;for (uint32_t i = 0; i < table->num_rows; i++) {deserialize_row(row_slot(table, i), &row);print_row(&row);}return EXECUTE_SUCCESS;
}
这里有一步反序列化
void* row_slot(Table* table, uint32_t row_num) {// 先找到对应页的编号uint32_t page_num = row_num / ROWS_PER_PAGE;void* page = table->pages[page_num];if (page == NULL) {// Allocate memory only when we try to access pagepage = table->pages[page_num] = malloc(PAGE_SIZE);}uint32_t row_offset = row_num % ROWS_PER_PAGE;uint32_t byte_offset = row_offset * ROW_SIZE;return page + byte_offset;
}
然后去看插入操作
ExecuteResult execute_insert(Statement* statement, Table* table) {if (table->num_rows >= TABLE_MAX_ROWS) {return EXECUTE_TABLE_FULL;}Row* row_to_insert = &(statement->row_to_insert);serialize_row(row_to_insert, row_slot(table, table->num_rows));table->num_rows += 1;return EXECUTE_SUCCESS;
}