mysql (storage engine)

created : Thu, 28 May 2020 07:48:47 +0900
modified : Sat, 26 Mar 2022 03:44:02 +0900
mysql storage engine database

참고용 홈페이지

  1. https://dev.mysql.com/doc/internals/en/custom-engine.html
  2. mysql architecture
  1. https://dev.mysql.com/doc/dev/mysql-server/latest/PAGE_PFS_PSI.html

환경 설정

$ git clone https://github.com/mysql/mysql-server

겪었던 문제들

코딩할 때 알아야할 내용

mysys.h 관련

  1. include/mysys.h 에 사용할만한 함수들이 많이 정리되어 있다.
  2. File 관련 : my_create, my_close
  3. 문자열 관련 : fn_format (filename format) - to(buffer variable), name, dir, extension, flags 순서
  4. Memory 관련 : my_malloc, my_free

THD (mysql thread 객체) 관련

  1. mysql thread 객체 (THD) 에 변수로 넣으면 (THDVAR_SET) 복사가 일어난다. (추측) -> 따라서 넣어야할 값이 있으면 my_malloc 하고 넣은다음에 my_free 해줘야한다. (Memory Leak 나지 않게 조심하자!)
  2. Mysql Thread 객체 (THD)에 값 확인, 넣기 : THDVAR(대상 Thread, 원하는 변수명), THDVAR_SET(대상 Thread, 원하는 변수명, 값의 주소)

Debug

  1. Debug를 위해서 return 같은거 할때 DBUG_RETURN 을 적극적으로 활용하자

참고용 홈페이지 읽고 정리하기

23.2 Overview

23.3 Creating Storage Engine Source Files

23.4 Adding Engine Specific Variables and Parameters

23.5 Creating the handlerton

typedef struct
  {
    const char *name; /* storage engine 의 이름 : CREATE TABLE ... ENGINE=FOO; 에 쓰인다. */
    SHOW_COMP_OPTION state; /* SHOW STORAGE ENGINES command를 사용하면 출력되는 값 */
    const char *comment; /* SHOW  STORAGE ENGINES command를 사용할때 출력되는 설명 */
    enum db_type db_type; /* custom engine 은 반드시 DB_TYPE_UNKNOWN 을 사용해야한다고 합니다. */
    bool (*init)(); /* Server 가 시작할때 딱 1번 불리게 되고, handler가 instance 화 되기 전에 처리되야할 내용을 넣으면된다. */
    uint slot; /* storage engine 마다 고유하게 가지고있는 메모리 영역 thd->ha_data[foo_hton.slot]으로 접근가능하고, Rollback 구현할떄 참조하라고 적혀있음 */
    uint savepoint_offset; /* savepoint 의 위치, 0이면 savepoint memory가 필요하지 않다. */
    int  (*close_connection)(THD *thd);
    int  (*savepoint_set)(THD *thd, void *sv);
    int  (*savepoint_rollback)(THD *thd, void *sv);
    int  (*savepoint_release)(THD *thd, void *sv);
    int  (*commit)(THD *thd, bool all);
    int  (*rollback)(THD *thd, bool all);
    int  (*prepare)(THD *thd, bool all);
    int  (*recover)(XID *xid_list, uint len);
    int  (*commit_by_xid)(XID *xid);
    int  (*rollback_by_xid)(XID *xid);
    void *(*create_cursor_read_view)();
    void (*set_cursor_read_view)(void *);
    void (*close_cursor_read_view)(void *);
    handler *(*create)(TABLE *table);
    void (*drop_database)(char* path);
    int (*panic)(enum ha_panic_function flag);
    int (*release_temporary_latches)(THD *thd);
    int (*update_statistics)();
    int (*start_consistent_snapshot)(THD *thd);
    bool (*flush_logs)();
    bool (*show_status)(THD *thd, stat_print_fn *print, enum ha_stat_type stat);
    int (*repl_report_sent_binlog)(THD *thd, char *log_file_name, my_off_t end_offset);
    uint32 flags;
  } handlerton;

23.6 Handling Handler Instantiation

static handler* example_create_handler (TABLE* table);
static handler *myisam_create_handler(TABLE *table)
  {
    return new ha_myisam(table);
  }
ha_federated::ha_federated(TABLE *table_arg)
  :handler(&federated_hton, table_arg),
    mysql(0), stored_result(0), scan_flag(0),
    ref_length(sizeof(MYSQL_ROW_OFFSET)), current_position(0)
    {}

23.7 Defining Filename Extensions

static const char *ha_tina_exts[] = {
  ".CSV",
  NullS
};
const char **ha_tina::bas_ext() const
{
  return ha_tina_exts;
}

23.8 Creating Tables

virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0;
typedef struct st_ha_create_information
{
    CHARSET_INFO *table_charset, *default_table_charset;
    LEX_STRING connect_string;
    const char *comment,*password;
    const char *data_file_name, *index_file_name;
    const char *alias;
    ulonglong max_rows,min_rows;
    ulonglong auto_increment_value;
    ulong table_options;
    ulong avg_row_length;
    ulong raid_chunksize;
    ulong used_fields;
    SQL_LIST merge_list;
    enum db_type db_type;
    enum row_type row_type;
    uint null_bits;                       /* NULL bits at start of record */
    uint options;                         /* OR of HA_CREATE_ options */
    uint raid_type,raid_chunks;
    uint merge_insert_method;
    uint extra_size;                      /* length of extra data segment */
    bool table_existed;                /* 1 in create if table existed */
    bool frm_only;                        /* 1 if no ha_create_table() */
    bool varchar;                         /* 1 if table has a VARCHAR */
} HA_CREATE_INFO;
int ha_tina::create(const char *name, TABLE *table_arg,
  HA_CREATE_INFO *create_info)
{
    char name_buff[FN_REFLEN];
    File create_file;
    DBUG_ENTER("ha_tina::create");

    if ((create_file= my_create(fn_format(name_buff, name, "", ".CSV",
          MY_REPLACE_EXT|MY_UNPACK_FILENAME),0,
          O_RDWR | O_TRUNC,MYF(MY_WME))) < 0)
    DBUG_RETURN(-1);

    my_close(create_file,MYF(0));

    DBUG_RETURN(0);
}

23.9 Opening a Table

int open(const char *name, int mode, int test_if_locked);
#define HA_OPEN_ABORT_IF_LOCKED   0   /* default */
#define HA_OPEN_WAIT_IF_LOCKED    1
#define HA_OPEN_IGNORE_IF_LOCKED  2
#define HA_OPEN_TMP_TABLE         4   /* Table is a temp table */
#define HA_OPEN_DELAY_KEY_WRITE   8   /* Don't update index */
#define HA_OPEN_ABORT_IF_CRASHED  16
#define HA_OPEN_FOR_REPAIR        32  /* open even if crashed */

코드 보면서 느낀거

23.10 Implementing Basic Table Scanning

23.10.1 Implementing the store_lock() Method

enum thr_lock_type
{
 TL_IGNORE=-1,
 TL_UNLOCK,                  /* UNLOCK ANY LOCK */
 TL_READ,                    /* Read lock */
 TL_READ_WITH_SHARED_LOCKS,
 TL_READ_HIGH_PRIORITY,      /* High prior. than TL_WRITE. Allow concurrent insert */
 TL_READ_NO_INSERT,          /* READ, Don't allow concurrent insert */
 TL_WRITE_ALLOW_WRITE,       /*   Write lock, but allow other threads to read / write. */
 TL_WRITE_ALLOW_READ,        /* Write lock, but allow other threads to read / write. */
 TL_WRITE_CONCURRENT_INSERT, /* WRITE lock used by concurrent insert. */
 TL_WRITE_DELAYED,           /* Write used by INSERT DELAYED.  Allows READ locks */
 TL_WRITE_LOW_PRIORITY,      /* WRITE lock that has lower priority than TL_READ */
 TL_WRITE,                   /* Normal WRITE lock */
 TL_WRITE_ONLY               /* Abort new lock request with an error */
};

23.10.2 Implementing the external_lock() Method

23.10.3 Implementing the rnd_init() Method

int ha_tina::rnd_init(bool scan)
{
      DBUG_ENTER("ha_tina::rnd_init");

      current_position= next_position= 0;
      records= 0;
      chain_ptr= chain;

      DBUG_RETURN(0);
}

23.10.4 Implementing the info(uinf flag) Method

ulonglong data_file_length;      /* Length off data file */
ulonglong max_data_file_length;  /* Length off data file */
ulonglong index_file_length;
ulonglong max_index_file_length;
ulonglong delete_length;         /* Free bytes */
ulonglong auto_increment_value;
ha_rows records;                 /* Records in table */
ha_rows deleted;                 /* Deleted records */
ulong raid_chunksize;
ulong mean_rec_length;           /* physical reclength */
time_t create_time;              /* When table was created */
time_t check_time;
time_t update_time;
int ha_tina::info(uint flag)
 {
   DBUG_ENTER("ha_tina::info");
   /* This is a lie, but you don't want the optimizer to see zero or 1 */
   if (!records_is_known && stats.records < 2)
     stats.records= 2;
   DBUG_RETURN(0);
 }

23.10.5 Implementing the extra() method

23.10.6 Implementing the rnd_next() method

int ha_tina::rnd_next(byte *buf)
{
  DBUG_ENTER("ha_tina::rnd_next");

  statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status);

  current_position= next_position;
  if (!share->mapped_file)
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  if (HA_ERR_END_OF_FILE == find_current_row(buf) )
    DBUG_RETURN(HA_ERR_END_OF_FILE);

  records++;
  DBUG_RETURN(0);
}
int ha_tina::find_current_row(byte *buf)
{
  byte *mapped_ptr= (byte *)share->mapped_file + current_position;
  byte *end_ptr;
  DBUG_ENTER("ha_tina::find_current_row");

  /* EOF should be counted as new line */
  if ((end_ptr=  find_eoln(share->mapped_file, current_position,
                           share->file_stat.st_size)) == 0)
    DBUG_RETURN(HA_ERR_END_OF_FILE);

  for (Field **field=table->field ; *field ; field++)
  {
    buffer.length(0);
    mapped_ptr++; // Increment past the first quote
    for(;mapped_ptr != end_ptr; mapped_ptr++)
    {
      // Need to convert line feeds!
      if (*mapped_ptr == '"' &&
          (((mapped_ptr[1] == ',') && (mapped_ptr[2] == '"')) ||
           (mapped_ptr == end_ptr -1 )))
      {
        mapped_ptr += Move past the , and the "
        break;
      }
      if (*mapped_ptr == '\\' && mapped_ptr != (end_ptr - 1))
      {
        mapped_ptr++;
        if (*mapped_ptr == 'r')
          buffer.append('\r');
        else if (*mapped_ptr == 'n' )
          buffer.append('\n');
        else if ((*mapped_ptr == '\\') || (*mapped_ptr == '"'))
          buffer.append(*mapped_ptr);
        else  /* This could only happed with an externally created file */
        {
          buffer.append('\\');
          buffer.append(*mapped_ptr);
        }
      }
      else
        buffer.append(*mapped_ptr);
    }
    (*field)->store(buffer.ptr(), buffer.length(), system_charset_info);
  }
  next_position= (end_ptr - share->mapped_file)+1;
  /* Maybe use \N for null? */
  memset(buf, 0, table->s->null_bytes); /* We do not implement nulls! */

  DBUG_RETURN(0);
}

23.11 Closing a Table

23.12 Adding Support for INSERT to a Storage Engine

int ha_foo::write_row(byte *buf)
int ha_myisam::write_row(byte * buf)
{
  statistic_increment(table->in_use->status_var.ha_write_count,&LOCK_status);

  /* If we have a timestamp column, update it to the current time */
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();

  /*
    If we have an auto_increment column and we are writing a changed row
    or a new row, then update the auto_increment value in the record.
  */
  if (table->next_number_field && buf == table->record[0])
    update_auto_increment();
  return mi_write(file,buf);
}

23.13 Adding Support for UPDATE to a Stroage Engine

int ha_foo::update_row(const byte *old_data, byte *new_data)
int ha_tina::update_row(const byte * old_data, byte * new_data)
{
  int size;
  DBUG_ENTER("ha_tina::update_row");

  statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
                     &LOCK_status);

  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
    table->timestamp_field->set_time();

  size= encode_quote(new_data);

  if (chain_append())
    DBUG_RETURN(-1);

  if (my_write(share->data_file, buffer.ptr(), size, MYF(MY_WME | MY_NABP)))
    DBUG_RETURN(-1);
  DBUG_RETURN(0);
}

23.14 Adding Support for DELETE to a Storage Engine

int ha_tina::delete_row(const byte * buf)
{
  DBUG_ENTER("ha_tina::delete_row");
  statistic_increment(table->in_use->status_var.ha_delete_count,
                      &LOCK_status);

  if (chain_append())
    DBUG_RETURN(-1);

  --records;

  DBUG_RETURN(0);
}

23.15 Supporting Non-Sequential Reads

23.15.1 Implementing the position() Method

void ha_foo::position(const byte *record)

23.15.2 Implementing the rns_pos() Method

int ha_foo::rnd_pos(byte * buf, byte *pos)

여담

23.16 Supporting Indexing

23.16.1 Indexing Overview

ha_foo::index_init
ha_foo::index_read
ha_foo::index_read_idx
ha_foo::rnd_next
ha_foo::update_row

23.16.2 Getting Index Information During CREATE TABLE Operations

#define HA_NOSAME             1  /* Set if not duplicated records   */
#define HA_PACK_KEY           2  /* Pack string key to previous key */
#define HA_AUTO_KEY           16
#define HA_BINARY_PACK_KEY    32 /* Packing of all keys to prev key */
#define HA_FULLTEXT          128 /* For full-text search            */
#define HA_UNIQUE_CHECK      256 /* Check the key for uniqueness    */
#define HA_SPATIAL          1024 /* For spatial search              */
#define HA_NULL_ARE_EQUAL   2048 /* NULL in key are cmp as equal    */
#define HA_GENERATED_KEY    8192 /* Automatically generated key     */
enum ha_key_alg {
 HA_KEY_ALG_UNDEF=     0,  /* Not specified (old file)     */
 HA_KEY_ALG_BTREE=     1,  /* B-tree, default one          */
 HA_KEY_ALG_RTREE=     2,  /* R-tree, for spatial searches */
 HA_KEY_ALG_HASH=      3,  /* HASH keys (HEAP tables)      */
 HA_KEY_ALG_FULLTEXT=  4   /* FULLTEXT (MyISAM tables)     */
};

23.16.3 Create Index Keys

23.16.4 Parsing Key Information

23.16.5 Providing Index Information to the Optimizer

23.16.5.1 Implementing the info() Method
23.16.6.2 Implementing the records_in_range Method
ha_rows ha_foo::records_in_range(uint inx, key_range *min_key, key_range *max_key)
typedef struct st_key_range
{
 const byte *key;
 uint length;
 key_part_map keypart_map;
 enum ha_rkey_function flag;
} key_range;

23.16.6 Preparing for Index Use with index_init()

int ha_foo::index_init(uint keynr, bool sorted)
int handler::index_init(uint idx) { active_index=idx; return 0; }

23.16.7 Cleaning up with index_end()

23.16.8 Implementing the index_read() Method

int ha_foo::index_read(byte * buf, const byte * key,
                       ulonglong keypart_map,
                       enum ha_rkey_function find_flag)
HA_READ_AFTER_KEY
HA_READ_BEFORE_KEY
HA_READ_KEY_EXACT
HA_READ_KEY_OR_NEXT
HA_READ_KEY_OR_PREV
HA_READ_PREFIX
HA_READ_PREFIX_LAST
HA_READ_PREFIX_LAST_OR_PREV

23.16.8 Implementing the index_read_idx() Method

int ha_foo::index_read_idx(byte * buf, uint keynr, const byte * key,
                          ulonglong keypart_map,
                          enum ha_rkey_function find_flag)

23.16.10 Implementing the index_read_last() Method

SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC;

23.16.11 Implementing the index_next() Method

 int ha_foo::index_next(byte * buf)

23.16.12 Implementing the index_prev() Method

23.16.13 Implementing the index_next() Method

 int ha_foo::index_first(byte * buf)

23.16.14 Implementing the index_last() Method

 int ha_foo::index_last(byte * buf)

23.17 Supporting Transactions

잠시