Glibc exit() 源码分析

bxz

2023-04-02

Exit

Glibc2.23 exit() source code analysis

测试程序

#include <stdio.h>
#include <stdlib.h>
int main(){
    puts("grxer");
    exit(0);
}

用户层面

stdlib/exit.c中

void
exit (int status)
{
  __run_exit_handlers (status, &__exit_funcs, true);
}
libc_hidden_def (exit)

__exit_funcs是什么?

struct exit_function_list
  {
    struct exit_function_list *next;//单链表
    size_t idx;//表示已经添加到该结构体中的函数数量。它用于跟踪 fns 数组中的下一个空闲位置。
    struct exit_function fns[32];//析构函数结构体数组
  };
struct exit_function
  {
    /* `flavour' should be of type of the `enum' above but since we need
       this element in an atomic operation we have to use `long int'.  */
    long int flavor;
    /*
    flavor：用于标识退出处理函数的类型,在此结构体中定义了一个枚举类型，该字段应该是该枚举类型的成员之一
    {ef_free, ef_us, ef_on, ef_at, ef_cxa}
       - ef_free表示此位置空闲
       - ef_us表示此位置被使用中, 但是函数类型不知道
       - ef_on, ef_at, ef_cxa 分别对应三种不同的析构函数类型, 主要是参数上的差异
       at：一个指向无参数无返回值函数的指针，用于表示一种特殊的退出处理函数，该函数不需要传递任何参数，只需在程序退出时执行即可。
       on：一个带有两个参数的函数指针，第一个参数是退出状态码，第二个参数是一个指针类型，用于传递函数的参数。
       cxa：一个带有三个参数的函数指针，第一个参数是一个指针类型，用于传递函数的参数，第二个参数是退出状态码，第三个参数是一个指向动态共享对象句柄的指针。
    */    
    union//union类型，用于表示上面三种不同类型的析构函数
      {
    void (*at) (void);
    struct
      {
        void (*fn) (int status, void *arg);
        void *arg;
      } on;
    struct
      {
        void (*fn) (void *arg, int status);
        void *arg;
        void *dso_handle;
      } cxa;
      } func;
  };
static struct exit_function_list initial;           //initial定义在libc的可写入段中
struct exit_function_list *__exit_funcs = &initial; //exit函数链表

__run_exit_handlers

void
attribute_hidden
__run_exit_handlers (int status, struct exit_function_list **listp,
             bool run_list_atexit)
{
  /* First, call the TLS destructors.  */
#ifndef SHARED
  if (&__call_tls_dtors != NULL)
#endif
    __call_tls_dtors ();

  /* We do it this way to handle recursive calls to exit () made by
     the functions registered with `atexit' and `on_exit'. We call
     everyone on the list and use the status value in the last
     exit (). */
  while (*listp != NULL)
    {
      struct exit_function_list *cur = *listp;

      while (cur->idx > 0)
    {
      const struct exit_function *const f =
        &cur->fns[--cur->idx];
      switch (f->flavor)
        {
          void (*atfct) (void);
          void (*onfct) (int status, void *arg);
          void (*cxafct) (void *arg, int status);

        case ef_free:
        case ef_us:
          break;
        case ef_on:
          onfct = f->func.on.fn;
#ifdef PTR_DEMANGLE
          PTR_DEMANGLE (onfct);
#endif
          onfct (status, f->func.on.arg);
          break;
        case ef_at:
          atfct = f->func.at;
#ifdef PTR_DEMANGLE
          PTR_DEMANGLE (atfct);
#endif
          atfct ();
          break;
        case ef_cxa:
          cxafct = f->func.cxa.fn;
#ifdef PTR_DEMANGLE
          PTR_DEMANGLE (cxafct);
#endif
          cxafct (f->func.cxa.arg, status);
          break;
        }
    }

      *listp = cur->next;
      if (*listp != NULL)
    /* Don't free the last element in the chain, this is the statically
       allocate element.  */
    free (cur);
    }

  if (run_list_atexit)
    RUN_HOOK (__libc_atexit, ());

  _exit (status);
}

__call_tls_dtors()

stdlib/cxa_thread_atexit_impl.c

/* Call the destructors.  This is called either when a thread returns from the
   initial function or when the process exits via the exit function.  */
void
__call_tls_dtors (void)
{
  while (tls_dtor_list)
    {
      struct dtor_list *cur = tls_dtor_list;
      dtor_func func = cur->func;
#ifdef PTR_DEMANGLE
      PTR_DEMANGLE (func);
#endif

      tls_dtor_list = tls_dtor_list->next;
      func (cur->obj);

      /* Ensure that the MAP dereference happens before
     l_tls_dtor_count decrement.  That way, we protect this access from a
     potential DSO unload in _dl_close_worker, which happens when
     l_tls_dtor_count is 0.  See CONCURRENCY NOTES for more detail.  */
      atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1);
      free (cur);
    }
}
libc_hidden_def (__call_tls_dtors)

注释写的很清楚，当线程从调用初始函数返回时或进程通过exit函数退出时调用，释放线程局部储存

tls_dtor_list 是多线程程序中的线程本地存储析构函数列表。tls_dtor_list 维护了所有需要在线程退出时被调用的析构函数的指针列表。这个列表会在程序运行时动态地进行修改和更新。

我们的示例程序并没有，直接return了

while循环

没什么好说的，不断遍历去根据类型，利用函数指针去调用__exit_funcs里的析构函数，释放掉结构体内存

不过这里涉及到一个解密操作

这个函数地址非常怪，不是正常值

会利用PTR_DEMANGLE这个解密

#  define PTR_DEMANGLE(var)    asm ("ror $2*" LP_SIZE "+1, %0\n"	      \
                     "xor %%fs:%c2, %0"			      \
                     : "=r" (var)			      \
                     : "0" (var),			      \
                       "i" (offsetof (tcbhead_t,	      \
                              pointer_guard)))

循环右移和异或解密，异或的这个key来自tcbhead_t 0x30偏移处，之前tsl hijack时操作的0x28处的canary

ror后

xor后

TODO

_dl_fini函数源码分析

TODO

__exit_funcs是怎么初始化的

RUN_HOOK (__libc_atexit, ())

text_set_element(__libc_atexit, _IO_cleanup);

int
_IO_cleanup (void)
{
  /* We do *not* want locking.  Some threads might use streams but
     that is their problem, we flush them underneath them.  */
  int result = _IO_flush_all_lockp (0);

  /* We currently don't have a reliable mechanism for making sure that
     C++ static destructors are executed in the correct order.
     So it is possible that other static destructors might want to
     write to cout - and they're supposed to be able to do so.

     The following will make the standard streambufs be unbuffered,
     which forces any output from late destructors to be written out. */
  _IO_unbuffer_all ();

  return result;
}

_IO_flush_all_lockp(o)

int _IO_flush_all_lockp (int do_lock)
{
  int result = 0;
  struct _IO_FILE *fp;
  int last_stamp;

  // 如果定义了 _IO_MTSAFE_IO 宏，就开始一个清理区域的操作，并在需要时加锁 list_all_lock
#ifdef _IO_MTSAFE_IO
  __libc_cleanup_region_start (do_lock, flush_cleanup, NULL);
  if (do_lock)
    _IO_lock_lock (list_all_lock);
#endif

  // 记录当前 _IO_list_all 的时间戳，从头遍历所有打开的文件指针
  last_stamp = _IO_list_all_stamp;
  fp = (_IO_FILE *) _IO_list_all;
  while (fp != NULL)
    {
      // 保存当前处理的文件指针，如果需要加锁就进行加锁操作
      run_fp = fp;
      if (do_lock)
        _IO_flockfile (fp);

      // 如果当前文件指针是写模式并且有数据要输出，或者是宽字符模式并且有宽字符数据要输出，就进行输出操作
      if (((fp->_mode <= 0 && fp->_IO_write_ptr > fp->_IO_write_base)
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
           || (_IO_vtable_offset (fp) == 0
               && fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr
                                    > fp->_wide_data->_IO_write_base))
#endif
           )
          && _IO_OVERFLOW (fp, EOF) == EOF)
        result = EOF;

      // 如果需要加锁就进行解锁操作，同时清空 run_fp 变量
      if (do_lock)
        _IO_funlockfile (fp);
      run_fp = NULL;

      // 如果 _IO_list_all 的时间戳已经改变，说明有新的文件指针被添加进去了，需要重新从头遍历
      if (last_stamp != _IO_list_all_stamp)
        {
          fp = (_IO_FILE *) _IO_list_all;
          last_stamp = _IO_list_all_stamp;
        }
      else
        fp = fp->_chain; // 否则继续处理下一个文件指针
    }

  // 如果定义了 _IO_MTSAFE_IO 宏，就解锁 list_all_lock，同时结束清理区域的操作
#ifdef _IO_MTSAFE_IO
  if (do_lock)
    _IO_lock_unlock (list_all_lock);
  __libc_cleanup_region_end (0);
#endif

  return result;
}

将所有打开的文件指针的缓冲区中的数据输出，并清空缓冲区。函数的返回值为 0 或 EOF，表示输出是否成功。函数参数 do_lock 表示是否需要对文件指针加锁操作。函数主要采用了循环遍历 _IO_list_all 链表的方式来处理所有打开的文件指针。其中，如果定义了 _IO_MTSAFE_IO 宏，则使用了线程安全的加锁和解锁操作。

函数里调用_IO_OVERFLOW

#define _IO_OVERFLOW(FP, CH) JUMP1 (__overflow, FP, CH)

调用虚表中的__overflow，即_IO_new_file_overflow函数，第一参数_IO_FILE文件流结构体，第二个参数 EOF(-1)

fwrite分析过，会来到

if (ch == EOF)
  return _IO_do_write (f, f->_IO_write_base,
     f->_IO_write_ptr - f->_IO_write_base);

调用了vtable中__write对应的_IO_new_file_write

执行系统调用write把数据写入文件

_IO_unbuffer_all

static void
_IO_unbuffer_all (void)
{
  struct _IO_FILE *fp;
  for (fp = (_IO_FILE *) _IO_list_all; fp; fp = fp->_chain)
    {
      if (! (fp->_flags & _IO_UNBUFFERED)
      /* Iff stream is un-orientated, it wasn't used. */
      && fp->_mode != 0)
    {
#ifdef _IO_MTSAFE_IO
      int cnt;
#define MAXTRIES 2
      for (cnt = 0; cnt < MAXTRIES; ++cnt)
        if (fp->_lock == NULL || _IO_lock_trylock (*fp->_lock) == 0)
          break;
        else
          /* Give the other thread time to finish up its use of the
         stream.  */
          __sched_yield ();
#endif

      if (! dealloc_buffers && !(fp->_flags & _IO_USER_BUF))
        {
          fp->_flags |= _IO_USER_BUF;

          fp->_freeres_list = freeres_list;
          freeres_list = fp;
          fp->_freeres_buf = fp->_IO_buf_base;
        }

      _IO_SETBUF (fp, NULL, 0);

      if (fp->_mode > 0)
        _IO_wsetb (fp, NULL, NULL, 0);

#ifdef _IO_MTSAFE_IO
      if (cnt < MAXTRIES && fp->_lock != NULL)
        _IO_lock_unlock (*fp->_lock);
#endif
    }

      /* Make sure that never again the wide char functions can be
     used.  */
      fp->_mode = -1;
    }
}

_IO_SETBUF (fp, NULL, 0)

#define _IO_SETBUF(*FP*, *BUFFER*, *LENGTH*) JUMP2 (__setbuf, FP, BUFFER, LENGTH)

! (fp->_flags & _IO_UNBUFFERED) && fp->_mode != 0

循环调用每个文件流的==vatble里__setbuf对应的_IO_new_file_setbuf==

# define _IO_new_file_setbuf _IO_file_setbuf
_IO_FILE *
_IO_new_file_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len)
{
  if (_IO_default_setbuf (fp, p, len) == NULL)
    return NULL;

  fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
    = fp->_IO_buf_base;
  _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);

  return fp;
}
libc_hidden_ver (_IO_new_file_setbuf, _IO_file_setbuf)

_IO_FILE *
_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len)
{
    if (_IO_SYNC (fp) == EOF)
    return NULL;
    if (p == NULL || len == 0)
      {
    fp->_flags |= _IO_UNBUFFERED;
    _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
      }
    else
      {
    fp->_flags &= ~_IO_UNBUFFERED;
    _IO_setb (fp, p, p+len, 0);
      }
    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
    return fp;
}

#define _IO_SYNC(FP) JUMP0 (__sync, FP)

==vtable __sync 对应的_IO_default_setbuf==

_IO_FILE *
_IO_default_setbuf (_IO_FILE *fp, char *p, _IO_ssize_t len)
{
    if (_IO_SYNC (fp) == EOF)
    return NULL;
    if (p == NULL || len == 0)
      {
    fp->_flags |= _IO_UNBUFFERED;
    _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
      }
    else
      {
    fp->_flags &= ~_IO_UNBUFFERED;
    _IO_setb (fp, p, p+len, 0);
      }
    fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
    fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;
    return fp;
}

void
_IO_setb (_IO_FILE *f, char *b, char *eb, int a)
{
  if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
    free (f->_IO_buf_base);
  f->_IO_buf_base = b;
  f->_IO_buf_end = eb;
  if (a)
    f->_flags &= ~_IO_USER_BUF;
  else
    f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)

取消所有缓冲区，包括标准I/O流和用户打开的文件流，以便文件的所有数据都立即被写入磁盘而不会留在缓冲区中。它遍历所有的_IO_FILE对象，并检查它们是否已被缓冲，如果是，则取消缓冲，并设置_IO_FILE对象的标志以指示它们已被缓冲。它还将_IO_FILE对象的模式设置为-1，以确保不再使用宽字符函数。在多线程环境下，代码使用锁来确保线程安全。

内核层面

_exit()

最后调用_exit系统调用销毁进程

_exit会立刻中断当前进程
关闭所有属于该进程的文件
将该进程的所有子进程移交给init进程
给该进程的父进程发送SIGCHLD信号
_exit的参数status会被返回给父进程，可以被父进程的wait函数接收。

总结

用户层面：释放TLS，需要释放libc中的流缓冲区, 退出前清空下stdout的缓冲区

内核层面：释放掉这个进程打开的文件描述符, 释放掉task结构体,有资源都被释放完毕后, 内核会从调度队列从取出这个任务，然后向父进程发送一个信号, 表示有一个子进程终止，此时这个进程才算是真正结束

进程终止=释放所有占有资源+cpu不在分配时间片给