Fuzz.AFL-All-in-One

2024-03-04 15:47:22 # Fuzz #Fuzz #Coding

overview

对于AFL代码的重新阅读，笔者之前曾经阅读过一次AFL代码，但是比较粗糙，所以决定重新阅读一遍，理解其中比较细节的部分。

首先简单介绍一下AFL，AFL是一个覆盖率引导的fuzz 工具。

它将一个无跳转的顺序执行流程看成一个基本块，并通过一个bitmap记录运行时每一个输入对应的标准块。

通过基本块的覆盖率引导对于输入种子的变异，从而不断变换输入，进行测试，来挖掘漏洞。

afl-gcc/afl-as | 代码插桩

afl的核心思想是覆盖率引导，为了能够得到运行时代码覆盖率，AFL需要在编译时对产生的汇编代码进行插桩，在每个基本块前插入的桩代码能够写入对应bitmap，记录运行时当前覆盖率

这是通过对gcc和as的封装实现的，也即通过封装编译器以及汇编器，来实现编译时插桩。

afl-gcc 的核心逻辑很简单

只是对于gcc，做了一些参数处理的封装，让gcc启用一些配合fuzz的编译参数，并且使用封装好的afl-as作为汇编器

int main(int argc, char **argv)
{

  if (isatty(2) && !getenv("AFL_QUIET"))
  {

    SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
  }
  else
    be_quiet = 1;

  if (argc < 2)
  {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
         "for gcc or clang, letting you recompile third-party code with the required\n"
         "runtime instrumentation. A common use pattern would be one of the following:\n\n"

         "  CC=%s/afl-gcc ./configure\n"
         "  CXX=%s/afl-g++ ./configure\n\n"

         "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
         "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
         BIN_PATH, BIN_PATH);

    exit(1);
  }

  find_as(argv[0]);
  // 首先找到封装的afl-as
  edit_params(argc, argv);
  // 然后对于参数进行处理传给真正的编译器
  execvp(cc_params[0], (char **)cc_params);
  // 然后运行编译器
  FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);

  return 0;
}

然后是afl-as，他是as的封装，插桩就是在此完成的, 这里主要关注插装的实现

int main(int argc, char** argv) {

  s32 pid;
  u32 rand_seed;
  int status;
  u8* inst_ratio_str = getenv("AFL_INST_RATIO");

  struct timeval tv;
  struct timezone tz;

  clang_mode = !!getenv(CLANG_ENV_VAR);

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
 
  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
         "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
         "don't want to run this program directly.\n\n"

         "Rarely, when dealing with extremely complex projects, it may be advisable to\n"
         "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
         "instrumenting every discovered branch.\n\n");

    exit(1);

  }

  gettimeofday(&tv, &tz);
  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
  srandom(rand_seed);
  edit_params(argc, argv);

  if (inst_ratio_str) {

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) 
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

  }

  if (getenv(AS_LOOP_ENV_VAR))
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

  setenv(AS_LOOP_ENV_VAR, "1", 1);

  /* When compiling with ASAN, we don't have a particularly elegant way to skip
     ASAN-specific branches. But we can probabilistically compensate for
     that... */

  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
    sanitizer = 1;
    inst_ratio /= 3;
  }

  if (!just_version) add_instrumentation();

  if (!(pid = fork())) {

    execvp(as_params[0], (char**)as_params);
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

  }

  if (pid < 0) PFATAL("fork() failed");

  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);

  exit(WEXITSTATUS(status));

}

插装的核心在于调用的 add_instrumentation 函数

add_instrumentation

add_instrumentation 就是实际用来插桩的函数。

此函数首先打开input文件和output文件

static void add_instrumentation(void) {

  static u8 line[MAX_LINE];

  FILE* inf;
  FILE* outf;
  s32 outfd;
  u32 ins_lines = 0;

  u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
      skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

  u8* colon_pos;

#endif /* __APPLE__ */

  if (input_file) {

    inf = fopen(input_file, "r");
    if (!inf) PFATAL("Unable to read '%s'", input_file);

  } else inf = stdin;

  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

  outf = fdopen(outfd, "w");

  if (!outf) PFATAL("fdopen() failed");

然后开始循环遍历input文件

1	while (fgets(line, MAX_LINE, inf)) {

在特定位置插桩代码，这里的几个flag的值将在后面解释，简单来说，就是用来找到需要插桩的基本块的开头。这几个flag就是用来识别当前是否是基本块的开头。如果是是，就需要进行插桩

if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
    instrument_next && line[0] == '\t' && isalpha(line[1])) {

  fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
          R(MAP_SIZE));

  instrument_next = 0;
  ins_lines++;

}

无论是否进行了插桩，最后都要将原行写入输出文件

1	fputs(line, outf);

由于一般只在text段插桩，所以要在找到此段，并且用 instr_ok 来标识

if (pass_thru) continue;

/* All right, this is where the actual fun begins. For one, we only want to
   instrument the .text section. So, let's keep track of that in processed
   files - and let's set instr_ok accordingly. */

if (line[0] == '\t' && line[1] == '.') {

  /* OpenBSD puts jump tables directly inline with the code, which is
     a bit annoying. They use a specific format of p2align directives
     around them, so we use that as a signal. */

  if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
      isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;

  if (!strncmp(line + 2, "text\n", 5) ||
      !strncmp(line + 2, "section\t.text", 13) ||
      !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
      !strncmp(line + 2, "section __TEXT,__text", 21)) {
    instr_ok = 1;
    continue; 
  }

  if (!strncmp(line + 2, "section\t", 8) ||
      !strncmp(line + 2, "section ", 8) ||
      !strncmp(line + 2, "bss\n", 4) ||
      !strncmp(line + 2, "data\n", 5)) {
    instr_ok = 0;
    continue;
  }

}

skip_csect 用来跳过无用段，比如64位程序中的.code32段

if (strstr(line, ".code")) {

  if (strstr(line, ".code32")) skip_csect = use_64bit;
  if (strstr(line, ".code64")) skip_csect = !use_64bit;

}

跳过intel 风格的汇编

1 2	if (strstr(line, ".intel_syntax")) skip_intel = 1; if (strstr(line, ".att_syntax")) skip_intel = 0;

跳过 ad-hoc __asm__ 字段

if (line[0] == '#' || line[1] == '#') {

  if (strstr(line, "#APP")) skip_app = 1;
  if (strstr(line, "#NO_APP")) skip_app = 0;

}

然后，对于条件跳转，可以直接区分出基本块，所以可以直接插桩

if (line[0] == '\t') {

  if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

    fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
            R(MAP_SIZE));
    ins_lines++;
  }

  continue;

}

识别跳转标签来插桩，并且针对不同平台进行处理

#ifdef __APPLE__

    /* Apple: L<whatever><digit>: */

    if ((colon_pos = strstr(line, ":"))) {

      if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {

#else

    /* Everybody else: .L<whatever>: */

    if (strstr(line, ":")) {

      if (line[0] == '.') {

#endif /* __APPLE__ */

        /* .L0: or LBB0_0: style jump destination */

#ifdef __APPLE__

        /* Apple: L<num> / LBB<num> */

        if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
            && R(100) < inst_ratio) {

#else

        /* Apple: .L<num> / .LBB<num> */

        if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
            && R(100) < inst_ratio) {

#endif /* __APPLE__ */

          /* An optimization is possible here by adding the code only if the
             label is mentioned in the code in contexts other than call / jmp.
             That said, this complicates the code by requiring two-pass
             processing (messy with stdin), and results in a speed gain
             typically under 10%, because compilers are generally pretty good
             about not generating spurious intra-function jumps.

             We use deferred output chiefly to avoid disrupting
             .Lfunc_begin0-style exception handling calculations (a problem on
             MacOS X). */

          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;

        }

      } else {

        /* Function label (always instrumented, deferred mode). */

        instrument_next = 1;
    
      }

    }

  }

最后，如果进行了插桩，再插入main_payload

if (ins_lines)
  fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

if (input_file) fclose(inf);
fclose(outf);

if (!be_quiet) {

  if (!ins_lines) WARNF("No instrumentation targets found%s.",
                        pass_thru ? " (pass-thru mode)" : "");
  else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
           ins_lines, use_64bit ? "64" : "32",
           getenv("AFL_HARDEN") ? "hardened" : 
           (sanitizer ? "ASAN/MSAN" : "non-hardened"),
           inst_ratio);
 
}

所以，综合来看，其实最终就是插入了两个部分：

在每个基本块前面插入了trampoline_fmt
在整体后面插入了main_payload

trampoline_fmt_64

trampoline直译为蹦床代码，一般是在两种运行环境之间桥接的代码，比如不同语言写的代码之间的参数转换以及环境保存和恢复。

afl在插桩时，会根据架构的不同，插入两种不同的 trampoline_fmt 代码，此代码用来在每个基本块运行前，写入对应的全局的bitmap，用来标识当前进程运行时经过此基本块方便后面计算覆盖率以及发现新路径

这里的trampoline_fmt_64就是64位下的相应代码

static const u8* trampoline_fmt_64 =

  "\n"
  "/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
  "\n"
  ".align 4\n"
  "\n"
  "leaq -(128+24)(%%rsp), %%rsp\n"
  "movq %%rdx,  0(%%rsp)\n"
  "movq %%rcx,  8(%%rsp)\n"
  "movq %%rax, 16(%%rsp)\n"
  "movq $0x%08x, %%rcx\n"
  "call __afl_maybe_log\n"
  "movq 16(%%rsp), %%rax\n"
  "movq  8(%%rsp), %%rcx\n"
  "movq  0(%%rsp), %%rdx\n"
  "leaq (128+24)(%%rsp), %%rsp\n"
  "\n"
  "/* --- END --- */\n"
  "\n";

这是一段跳转代码，前面用于保存参数，核心在于

1 2	"movq $0x%08x, %%rcx\n" "call __afl_maybe_log\n"

在上文中可以看到，这里的rcx的值是fprintf 格式化的一个随机值，用来标识代码块，笔者其实有点疑惑为什么不用一个从0开始的值，然后一个个加上去，避免重复。

1 2	fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE));

核心调用的__afl_maybe_log 在 main_payload 中实现

main_payload_64

main_payload_64包含 trampoline_fmt 运行时所需要的一些函数以及全局变量，其中最核心的是 __afl_maybe_log 函数

这里整个流程大致如下(图源ScUpax0s师傅)

简单解释一下就是，在第一此运行时， __afl_area_ptr==NULL 和 __afl_alobal_area==NULL 均为true，说明此时是第一次运行到__afl_maybe_log ，此时会进入下面的分支，在完成初始化后，进程阻塞从管道中读取，直到收到afl-fuzz进程发送过来的启动命令，此时会fork一个子进程，此子进程并恢复寄存器，然后继续运行。

在之后，子进程再次进入桩代码就会直接进入 __afl_store，也就是写入当前基本块对应的bitmap用来标识运行状态

第一次运行

初始化

首先是检查共享内存是否初始化，也就是检查 __afl_area_ptr 是否为NULL

"  seto  %al\n"
"\n"
"  /* Check if SHM region is already mapped. */\n"
"\n"
"  movq  __afl_area_ptr(%rip), %rdx\n"
"  testq %rdx, %rdx\n"
"  je    __afl_setup\n"
"\n"

如果没有，初始化共享内存并将指针保存至 __afl_area_ptr 和 __afl_global_area

这里的共享内存的id 通过环境变量来传递，通过getenv获取 AFL_SHM_ENV ，然后map出共享内存，此共享内存用来保存运行时的bitmap，需要注意的是，虽然名为bitmap，但实际上此时，这里的bitmap仍然是用一byte而不是一bit来标识相应区域是否运行到的，因为此时可以通过byte位记录运行次数，并且对于每一位的访问也要比真正的bitmap快一些。

真正的bitmap要在之后，在afl-fuzz中，通过共享内存获得运行结果后，将此处对应的内存压缩成为真正的bitmap

  "__afl_setup:\n"
  "\n"
  "  /* Do not retry setup if we had previous failures. */\n"
  "\n"
  "  cmpb $0, __afl_setup_failure(%rip)\n"
  "  jne __afl_return\n"
  "\n"
  "  /* Check out if we have a global pointer on file. */\n"
  "\n"
#ifndef __APPLE__
  "  movq  __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
  "  movq  (%rdx), %rdx\n"
#else
  "  movq  __afl_global_area_ptr(%rip), %rdx\n"
#endif /* !^__APPLE__ */
  "  testq %rdx, %rdx\n"
  "  je    __afl_setup_first\n"
  "\n"
  "  movq %rdx, __afl_area_ptr(%rip)\n"
  "  jmp  __afl_store\n" 
  "\n"
  "__afl_setup_first:\n"
  "\n"
  "  /* Save everything that is not yet saved and that may be touched by\n"
  "     getenv() and several other libcalls we'll be relying on. */\n"
  "\n"
  "  leaq -352(%rsp), %rsp\n"
  "\n"
  "  movq %rax,   0(%rsp)\n"
  "  movq %rcx,   8(%rsp)\n"
  "  movq %rdi,  16(%rsp)\n"
  "  movq %rsi,  32(%rsp)\n"
  "  movq %r8,   40(%rsp)\n"
  "  movq %r9,   48(%rsp)\n"
  "  movq %r10,  56(%rsp)\n"
  "  movq %r11,  64(%rsp)\n"
  "\n"
  "  movq %xmm0,  96(%rsp)\n"
  "  movq %xmm1,  112(%rsp)\n"
  "  movq %xmm2,  128(%rsp)\n"
  "  movq %xmm3,  144(%rsp)\n"
  "  movq %xmm4,  160(%rsp)\n"
  "  movq %xmm5,  176(%rsp)\n"
  "  movq %xmm6,  192(%rsp)\n"
  "  movq %xmm7,  208(%rsp)\n"
  "  movq %xmm8,  224(%rsp)\n"
  "  movq %xmm9,  240(%rsp)\n"
  "  movq %xmm10, 256(%rsp)\n"
  "  movq %xmm11, 272(%rsp)\n"
  "  movq %xmm12, 288(%rsp)\n"
  "  movq %xmm13, 304(%rsp)\n"
  "  movq %xmm14, 320(%rsp)\n"
  "  movq %xmm15, 336(%rsp)\n"
  "\n"
  "  /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */\n"
  "\n"
  "  /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the\n"
  "     original stack ptr in the callee-saved r12. */\n"
  "\n"
  "  pushq %r12\n"
  "  movq  %rsp, %r12\n"
  "  subq  $16, %rsp\n"
  "  andq  $0xfffffffffffffff0, %rsp\n"
  "\n"
  "  leaq .AFL_SHM_ENV(%rip), %rdi\n"
  CALL_L64("getenv")
  "\n"
  "  testq %rax, %rax\n"
  "  je    __afl_setup_abort\n"
  "\n"
  "  movq  %rax, %rdi\n"
  CALL_L64("atoi")
  "\n"
  "  xorq %rdx, %rdx   /* shmat flags    */\n"
  "  xorq %rsi, %rsi   /* requested addr */\n"
  "  movq %rax, %rdi   /* SHM ID         */\n"
  CALL_L64("shmat")
  "\n"
  "  cmpq $-1, %rax\n"
  "  je   __afl_setup_abort\n"
  "\n"
  "  /* Store the address of the SHM region. */\n"
  "\n"
  "  movq %rax, %rdx\n"
  "  movq %rax, __afl_area_ptr(%rip)\n"
  "\n"
#ifdef __APPLE__
  "  movq %rax, __afl_global_area_ptr(%rip)\n"
#else
  "  movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx\n"
  "  movq %rax, (%rdx)\n"
#endif /* ^__APPLE__ */
  "  movq %rax, %rdx\n"

然后再通过管道通知主进程此进程已经准备好了

"__afl_forkserver:\n"
"\n"
"  /* Enter the fork server mode to avoid the overhead of execve() calls. We\n"
"     push rdx (area ptr) twice to keep stack alignment neat. */\n"
"\n"
"  pushq %rdx\n"
"  pushq %rdx\n"
"\n"
"  /* Phone home and tell the parent that we're OK. (Note that signals with\n"
"     no SA_RESTART will mess it up). If this fails, assume that the fd is\n"
"     closed because we were execve()d from an instrumented binary, or because\n"
"     the parent doesn't want to use the fork server. */\n"
"\n"
"  movq $4, %rdx               /* length    */\n"
"  leaq __afl_temp(%rip), %rsi /* data      */\n"
"  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi       /* file desc */\n"
CALL_L64("write")
"\n"
"  cmpq $4, %rax\n"
"  jne  __afl_fork_resume\n"
"\n"

`__afl_fork_wait_loop`

__afl_fork_wait_loop 的作用是阻塞当前进程，直到从管道收到主进程发来的运行命令，如果收到了信号，则fork一个子进程，并调用 __afl_fork_resume 继续运行，否则继续阻塞

这里的和afl-fuzz通信用的管道的fd是相互约定好的，我们直到，此代码会插桩在需要fuzz的程序中，afl-fuzz会通过fork启动此程序，而fork是会继承文件描述符的，因此只要双方约定好一个确定的较大的文件描述符，即可相互通信

"__afl_fork_wait_loop:\n"
"\n"
"  /* Wait for parent by reading from the pipe. Abort if read fails. */\n"
"\n"
"  movq $4, %rdx               /* length    */\n"
"  leaq __afl_temp(%rip), %rsi /* data      */\n"
"  movq $" STRINGIFY(FORKSRV_FD) ", %rdi             /* file desc */\n"
CALL_L64("read")
"  cmpq $4, %rax\n"
"  jne  __afl_die\n"
"\n"
"  /* Once woken up, create a clone of our process. This is an excellent use\n"
"     case for syscall(__NR_clone, 0, CLONE_PARENT), but glibc boneheadedly\n"
"     caches getpid() results and offers no way to update the value, breaking\n"
"     abort(), raise(), and a bunch of other things :-( */\n"
"\n"
CALL_L64("fork")
"  cmpq $0, %rax\n"
"  jl   __afl_die\n"
"  je   __afl_fork_resume\n"
"\n"
"  /* In parent process: write PID to pipe, then wait for child. */\n"
"\n"
"  movl %eax, __afl_fork_pid(%rip)\n"
"\n"
"  movq $4, %rdx                   /* length    */\n"
"  leaq __afl_fork_pid(%rip), %rsi /* data      */\n"
"  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi             /* file desc */\n"
CALL_L64("write")
"\n"
"  movq $0, %rdx                   /* no flags  */\n"
"  leaq __afl_temp(%rip), %rsi     /* status    */\n"
"  movq __afl_fork_pid(%rip), %rdi /* PID       */\n"
CALL_L64("waitpid")
"  cmpq $0, %rax\n"
"  jle  __afl_die\n"
"\n"
"  /* Relay wait status to pipe, then loop back. */\n"
"\n"
"  movq $4, %rdx               /* length    */\n"
"  leaq __afl_temp(%rip), %rsi /* data      */\n"
"  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi         /* file desc */\n"
CALL_L64("write")
"\n"
"  jmp  __afl_fork_wait_loop\n"

__afl_fork_resume 用于恢复运行

实际上就是恢复了在栈中的寄存器

"__afl_fork_resume:\n"
"\n"
"  /* In child process: close fds, resume execution. */\n"
"\n"
"  movq $" STRINGIFY(FORKSRV_FD) ", %rdi\n"
CALL_L64("close")
"\n"
"  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi\n"
CALL_L64("close")
"\n"
"  popq %rdx\n"
"  popq %rdx\n"
"\n"
"  movq %r12, %rsp\n"
"  popq %r12\n"
"\n"
"  movq  0(%rsp), %rax\n"
"  movq  8(%rsp), %rcx\n"
"  movq 16(%rsp), %rdi\n"
"  movq 32(%rsp), %rsi\n"
"  movq 40(%rsp), %r8\n"
"  movq 48(%rsp), %r9\n"
"  movq 56(%rsp), %r10\n"
"  movq 64(%rsp), %r11\n"
"\n"
"  movq  96(%rsp), %xmm0\n"
"  movq 112(%rsp), %xmm1\n"
"  movq 128(%rsp), %xmm2\n"
"  movq 144(%rsp), %xmm3\n"
"  movq 160(%rsp), %xmm4\n"
"  movq 176(%rsp), %xmm5\n"
"  movq 192(%rsp), %xmm6\n"
"  movq 208(%rsp), %xmm7\n"
"  movq 224(%rsp), %xmm8\n"
"  movq 240(%rsp), %xmm9\n"
"  movq 256(%rsp), %xmm10\n"
"  movq 272(%rsp), %xmm11\n"
"  movq 288(%rsp), %xmm12\n"
"  movq 304(%rsp), %xmm13\n"
"  movq 320(%rsp), %xmm14\n"
"  movq 336(%rsp), %xmm15\n"
"\n"
"  leaq 352(%rsp), %rsp\n"
"\n"
"  jmp  __afl_store\n"
"\n"
"__afl_die:\n"
"\n"
"  xorq %rax, %rax\n"
CALL_L64("_exit")

`__afl_store`

__afl_store 用来更新bitmap状态

  "__afl_store:\n"
  "\n"
  "  /* Calculate and store hit for the code location specified in rcx. */\n"
  "\n"
#ifndef COVERAGE_ONLY
  "  xorq __afl_prev_loc(%rip), %rcx\n"
  "  xorq %rcx, __afl_prev_loc(%rip)\n"
  "  shrq $1, __afl_prev_loc(%rip)\n"
#endif /* ^!COVERAGE_ONLY */
  "\n"
#ifdef SKIP_COUNTS
  "  orb  $1, (%rdx, %rcx, 1)\n"
#else
  "  incb (%rdx, %rcx, 1)\n"

此处笔者其实还没有完全理解，这里的rcx实际上是trampoline中送来的标识每一个基本块的随机id，此处的代码用随机id异或上一个运行的桩的随机id来作为当前块在bitmap里的offset，并将此offset处的计数加一，用来表示对应的基本块运行了一次，同时，将此id存入 __afl_prev_loc 使用，记录为上一次桩的随机id，并右移一位。

这里offset是怎么保证不重复的呢，笔者感觉应该是跟线性同余随机算法的特性有关，不过由于不是笔者的重点，所以笔者暂且不过多探究。

afl-fuzz | 一次fuzz的标准流程

全局变量

首先是bitmap相关

EXP_ST u8 *trace_bits; // 和子进程共享的bitmap，程序运行的结果就存在于此bitmap中

EXP_ST u8 virgin_bits[MAP_SIZE], // 标记仍然没有被触及到的区域
    virgin_tmout[MAP_SIZE],      // 标记还没有出现在tmout的区域
    virgin_crash[MAP_SIZE];      // 标记还没有出现在crash的区域

然后是testcase组成的队列，每一个testcase会在初始化时被初始化为queue中的一个实体

struct queue_entry
{

  u8 *fname; /* File name for the test case      */
  u32 len;   /* Input length                     */

  u8 cal_failed,    /* Calibration failed?              */
      trim_done,    /* Trimmed?                         */
      was_fuzzed,   /* Had any fuzzing done yet?        */
      passed_det,   /* Deterministic stages passed?     */
      has_new_cov,  /* Triggers new coverage?           */
      var_behavior, /* Variable behavior?               */
      favored,      /* Currently favored?               */
      fs_redundant; /* Marked as redundant in the fs?   */

  u32 bitmap_size, /* Number of bits set in bitmap     */
      exec_cksum;  /* Checksum of the execution trace  */

  u64 exec_us,  /* Execution time (us)              */
      handicap, /* Number of queue cycles behind    */
      depth;    /* Path depth                       */

  u8 *trace_mini; /* Trace bytes, if kept             */
  u32 tc_ref;     /* Trace bytes ref count            */

  struct queue_entry *next, /* Next element, if any             */
      *next_100;            /* 100 elements ahead               */
};

static struct queue_entry *queue, /* Fuzzing queue (linked list)      */
    *queue_cur,                   // 当前处理的testscase
    *queue_top,                   // testcase list的顶部
    *q_prev100;                   // 前100标记

然后是与queue相关的一些变量

EXP_ST u32 queued_paths,  // 
    queued_variable,     // 存在可变区域的testcase的数量
    queued_at_start,     // testcase 的初始数量
    queued_discovered,   // 运行时发现的数量
    queued_imported,     // 通过-S引入的数量
    queued_favored,      // favored_queue 的数量
    queued_with_cov,     // 存在新覆盖的queue的数量
    pending_not_fuzzed,  // 还没有被fuzz的数量
    pending_favored,     // 还没有被fuzz的favored_queue的数量
    cur_skipped_paths,   /* Abandoned inputs in cur cycle    */
    cur_depth,           /* Current path depth               */
    max_depth,           /* Max path depth                   */
    useless_at_start,    /* Number of useless starting paths */
    var_byte_count,      /* Bitmap bytes with var behavior   */
    current_entry,       /* Current queue entry ID           */
    havoc_div = 1;       /* Cycle count divisor for havoc    */

main

fuzz的开始是对于参数的解析

case 'i': /* input dir */

  if (in_dir) FATAL("Multiple -i options not supported");
  in_dir = optarg;

  if (!strcmp(in_dir, "-")) in_place_resume = 1;

  break;
  ....

在完成参数的解析后，开始设置对应的信号处理的handle, 这一部分将在之后进行分析

1	setup_signal_handlers();

以及检查ASAN参数

1	check_asan_opts();

然后开始对应环境变量的解析

if (sync_id) fix_up_sync();

if (!strcmp(in_dir, out_dir))
  FATAL("Input and output directories can't be the same");

if (dumb_mode) {

  if (crash_mode) FATAL("-C and -n are mutually exclusive");
  if (qemu_mode)  FATAL("-Q and -n are mutually exclusive");

}

if (getenv("AFL_NO_FORKSRV"))    no_forkserver    = 1;
if (getenv("AFL_NO_CPU_RED"))    no_cpu_meter_red = 1;
if (getenv("AFL_NO_ARITH"))      no_arith         = 1;
if (getenv("AFL_SHUFFLE_QUEUE")) shuffle_queue    = 1;
if (getenv("AFL_FAST_CAL"))      fast_cal         = 1;

if (getenv("AFL_HANG_TMOUT")) {
  hang_tmout = atoi(getenv("AFL_HANG_TMOUT"));
  if (!hang_tmout) FATAL("Invalid value of AFL_HANG_TMOUT");
}

if (dumb_mode == 2 && no_forkserver)
  FATAL("AFL_DUMB_FORKSRV and AFL_NO_FORKSRV are mutually exclusive");

if (getenv("AFL_PRELOAD")) {
  setenv("LD_PRELOAD", getenv("AFL_PRELOAD"), 1);
  setenv("DYLD_INSERT_LIBRARIES", getenv("AFL_PRELOAD"), 1);
}

if (getenv("AFL_LD_PRELOAD"))
  FATAL("Use AFL_PRELOAD instead of AFL_LD_PRELOAD");

对于此处涉及到的环境变量将在后面一一说明

接下来将原来的命令行参数保存起来

1	save_cmdline(argc, argv);

设置用户态banner

1	fix_up_banner(argv[optind]);

检查是否在tty模式下运行

1	check_if_tty();

获取核心数

1	get_core_count();

如果设置了 AFFINITY

1
2
3

#ifdef HAVE_AFFINITY
  bind_to_free_cpu();
#endif /* HAVE_AFFINITY */

然后是一些对于机器和架构的检查

1 2	check_crash_handling(); check_cpu_governor();

设置postprocessor

1	setup_post();

设置共享内存用于消息的传递

1	setup_shm();

对于class16数据进行分类

1	init_count_class16();

为输出文件设置dir以及fd

1	setup_dirs_fds();

读取初始测试用例

1 2	read_testcases(); load_auto();

对输入目录进行一些处理

1	pivot_inputs();

接下来又是一连串的处理

if (extras_dir) load_extras(extras_dir);
// 如果设置了extras，那么加载extras
// 类似于字典
if (!timeout_given) find_timeout();
// 如果设置了timeout，设置给定timeout
detect_file_args(argv + optind + 1);
// 检查运行参数，找到@@
if (!out_file) setup_stdio_file();
// 设置输出文件
check_binary(argv[optind]);
// 检查目标程序
start_time = get_cur_time();
// 设置起始时间
if (qemu_mode)
// 如果使用了qemu_mode
  use_argv = get_qemu_argv(argv[0], argv + optind, argc - optind);
else
  use_argv = argv + optind;

对于输入进行试运行，确保所有输入符合预期

接下来cull_queue，在之后继续分析

1	cull_queue();

show_init_stats();
// 输出初始状态
seek_to = find_start_position();
// 针对resume状态而言，快速回复到终止位置

write_stats_file(0, 0, 0);
// 将初始状态保存在stat文件中
save_auto();
// 保存自动生成的extras

接下来是针对stop的处理

if (stop_soon) goto stop_fuzzing;

if (!not_on_tty) {
  sleep(4);
  start_time += 4000;
  if (stop_soon) goto stop_fuzzing;
}

然后正式进入fuzz循环

在循环前还要cull_queue 一次

 while (1) {

   u8 skipped_fuzz;

   cull_queue();
// 再次进行cull_queue 操作
   if (!queue_cur) {
   
     queue_cycle++;
     current_entry     = 0;
     cur_skipped_paths = 0;
     queue_cur         = queue;

     while (seek_to) {
       current_entry++;
       seek_to--;
       queue_cur = queue_cur->next;
     }
  // 当存在seek_to时，直接跳到对应的testcase
     show_stats();

     if (not_on_tty) {
       ACTF("Entering queue cycle %llu.", queue_cycle);
       fflush(stdout);
     }

     /* If we had a full queue cycle with no new finds, try
        recombination strategies next. */

     if (queued_paths == prev_queued) {

       if (use_splicing) cycles_wo_finds++; else use_splicing = 1;

     } else cycles_wo_finds = 0;

     prev_queued = queued_paths;

     if (sync_id && queue_cycle == 1 && getenv("AFL_IMPORT_FIRST"))
       sync_fuzzers(use_argv);

   }

   skipped_fuzz = fuzz_one(use_argv);
// 运行一次fuzz，并完成种子的变异阶段
   if (!stop_soon && sync_id && !skipped_fuzz) {
     
     if (!(sync_interval_cnt++ % SYNC_INTERVAL))
       sync_fuzzers(use_argv);

   }

   if (!stop_soon && exit_1) stop_soon = 2;

   if (stop_soon) break;

   queue_cur = queue_cur->next;
   current_entry++;
   // 测试下一个种子

 }

setup_shm | 基于共享内存的消息传递

这里是和fuzz对象子进程对应的设置共享内存，用来传递bitmap

EXP_ST void setup_shm(void) {

  u8* shm_str;

  if (!in_bitmap) memset(virgin_bits, 255, MAP_SIZE);

  memset(virgin_tmout, 255, MAP_SIZE);
  memset(virgin_crash, 255, MAP_SIZE);

  shm_id = shmget(IPC_PRIVATE, MAP_SIZE, IPC_CREAT | IPC_EXCL | 0600);

  if (shm_id < 0) PFATAL("shmget() failed");

  atexit(remove_shm);

  shm_str = alloc_printf("%d", shm_id);

  /* If somebody is asking us to fuzz instrumented binaries in dumb mode,
     we don't want them to detect instrumentation, since we won't be sending
     fork server commands. This should be replaced with better auto-detection
     later on, perhaps? */

  if (!dumb_mode) setenv(SHM_ENV_VAR, shm_str, 1);

  ck_free(shm_str);

  trace_bits = shmat(shm_id, NULL, 0);
  
  if (trace_bits == (void *)-1) PFATAL("shmat() failed");

}

这里直接创建一个共享内存，同子进程forkserver中 __afl_global_area 指向的区域进行共享

对于testcase的预处理

主要包含三个函数：

read_testcase: 从文件中读取testcases
perform_dry_run: 对于testcase的试运行
cull_queue: 挑选更好的种子

read_testcases | 读取testcase

用来从文件中读取testcase

首先创建了一些局部变量

struct dirent **nl;
s32 nl_cnt;
u32 i;
u8* fn;

找到queue文件夹:

fn = alloc_printf("%s/queue", in_dir);
if (!access(fn, F_OK)) in_dir = fn; else ck_free(fn);

ACTF("Scanning '%s'...", in_dir);

通过scandir获取字母序的目录文件

1	nl_cnt = scandir(in_dir, &nl, NULL, alphasort);

如果设置了shullle_queue，并且queue数量大于1，那就打乱此nl数组

if (shuffle_queue && nl_cnt > 1) {

  ACTF("Shuffling queue...");
  shuffle_ptrs((void**)nl, nl_cnt);
}

shuffle的逻辑也很简单，就是进行n次的随机交换

static void shuffle_ptrs(void** ptrs, u32 cnt) {

  u32 i;

  for (i = 0; i < cnt - 2; i++) {

    u32 j = i + UR(cnt - i);
    void *s = ptrs[i];
    ptrs[i] = ptrs[j];
    ptrs[j] = s;

  }

}

接下来是一个大循环，用来对于testcase一个个进行初始化

 for (i = 0; i < nl_cnt; i++) {

   struct stat st;

   u8* fn = alloc_printf("%s/%s", in_dir, nl[i]->d_name);
   u8* dfn = alloc_printf("%s/.state/deterministic_done/%s", in_dir, nl[i]->d_name);
   // 首先找到两个对应文件
   u8  passed_det = 0;
  
   free(nl[i]); /* not tracked */
   // 然后释放文件nl对象内存 
   if (lstat(fn, &st) || access(fn, R_OK))
     PFATAL("Unable to access '%s'", fn);
// 判断是否可以存在相应文件
   /* This also takes care of . and .. */

   if (!S_ISREG(st.st_mode) || !st.st_size || strstr(fn, "/README.testcases")) {
     // 剔除 "."、".." 和 README.testcases 
     // 以及空文件 等无效文件
     ck_free(fn);
     ck_free(dfn);
     continue;

   }

   if (st.st_size > MAX_FILE) 
     FATAL("Test case '%s' is too big (%s, limit is %s)", fn,
           DMS(st.st_size), DMS(MAX_FILE));
   // 如果testcase太大
   /* Check for metadata that indicates that deterministic fuzzing
      is complete for this entry. We don't want to repeat deterministic
      fuzzing when resuming aborted scans, because it would be pointless
      and probably very time-consuming. */
      
// 通过是否存在deterministic_done文件，来判断是否是resuming 
// 如果是resume，则跳过deterministic fuzz 阶段
   if (!access(dfn, F_OK)) passed_det = 1;
   ck_free(dfn);
   // 将testcase添加进queue 
   add_to_queue(fn, st.st_size, passed_det);
   
 }

最后是收尾的一些处理：

free(nl); /* not tracked */

if (!queued_paths)
{

  SAYF("\n" cLRD "[-] " cRST
       "Looks like there are no valid test cases in the input directory! The fuzzer\n"
       "    needs one or more test case to start with - ideally, a small file under\n"
       "    1 kB or so. The cases must be stored as regular files directly in the\n"
       "    input directory.\n");

  FATAL("No usable test cases in '%s'", in_dir);
}

last_path_time = 0;
queued_at_start = queued_paths;

add_to_queue

通过add_to_queue 将testcase加入queue

static void add_to_queue(u8* fname, u32 len, u8 passed_det) {

  struct queue_entry* q = ck_alloc(sizeof(struct queue_entry));

  q->fname        = fname;
  q->len          = len;
  q->depth        = cur_depth + 1;
  q->passed_det   = passed_det;
  // 设置queue的各个成员信息
  if (q->depth > max_depth) max_depth = q->depth;

  if (queue_top) {

    queue_top->next = q;
    queue_top = q;

  } else q_prev100 = queue = queue_top = q;
  // 将此queue_entry加入队列
  // 将此queue_entry放入queue_top
  queued_paths++;
  // 增加queue路径
  pending_not_fuzzed++;
  // 增加等待fuzz 计数
  cycles_wo_finds = 0;
  
  /* Set next_100 pointer for every 100th element (index 0, 100, etc) to allow faster iteration. */
  if ((queued_paths - 1) % 100 == 0 && queued_paths > 1) {
  // q_prev100 是一个相对于普通queue间隔100的queue，
  // 用来快速访问
    q_prev100->next_100 = q;
    q_prev100 = q;

  }

  last_path_time = get_cur_time();

}

其中最核心的部分就是设置了两个queue

perform_dry_run | 对于testcase的试运行

此函数会遍历queue中的每个个体，然后对于对应的testcase进行一下试运行，并根据运行结果先筛选出不合适的testcase。

首先创建了一些局部变量

1
2
3

struct queue_entry *q = queue;
u32 cal_failures = 0;
u8 *skip_crashes = getenv("AFL_SKIP_CRASHES");

然后进入了一个循环

循环的开始是通过queue的文件名读取了输入用例

while (q)
{

  u8 *use_mem;
  u8 res;
  s32 fd;

  u8 *fn = strrchr(q->fname, '/') + 1;

  ACTF("Attempting dry run with '%s'...", fn);
  
  fd = open(q->fname, O_RDONLY);
  if (fd < 0)
    PFATAL("Unable to open '%s'", q->fname);
 
  use_mem = ck_alloc_nozero(q->len);

  if (read(fd, use_mem, q->len) != q->len)
    FATAL("Short read from '%s'", q->fname);

  close(fd);

然后通过 calibrate_case 对testcase进行了处理并尝试运行

1 2	res = calibrate_case(argv, q, use_mem, 0, 1); ck_free(use_mem);

根据运行结果进行相应错误处理

    if (res == crash_mode || res == FAULT_NOBITS)
      SAYF(cGRA "    len = %u, map size = %u, exec speed = %llu us\n" cRST,
           q->len, q->bitmap_size, q->exec_us);

    switch (res)
    {

    case FAULT_NONE:
      // 如果没有错误，并且是queue的第一个testcase
      if (q == queue)
        check_map_coverage();

      if (crash_mode)
        FATAL("Test case '%s' does *NOT* crash", fn);
 
      break;

    case FAULT_TMOUT:

      if (timeout_given)
      {

        /* The -t nn+ syntax in the command line sets timeout_given to '2' and
           instructs afl-fuzz to tolerate but skip queue entries that time
           out. */

        if (timeout_given > 1)
        {
          WARNF("Test case results in a timeout (skipping)");
          q->cal_failed = CAL_CHANCES;
          cal_failures++;
          break;
        }

        SAYF("\n" cLRD "[-] " cRST
             "The program took more than %u ms to process one of the initial test cases.\n"
             "    Usually, the right thing to do is to relax the -t option - or to delete it\n"
             "    altogether and allow the fuzzer to auto-calibrate. That said, if you know\n"
             "    what you are doing and want to simply skip the unruly test cases, append\n"
             "    '+' at the end of the value passed to -t ('-t %u+').\n",
             exec_tmout,
             exec_tmout);

        FATAL("Test case '%s' results in a timeout", fn);
      }
      else
      {

        SAYF("\n" cLRD "[-] " cRST
             "The program took more than %u ms to process one of the initial test cases.\n"
             "    This is bad news; raising the limit with the -t option is possible, but\n"
             "    will probably make the fuzzing process extremely slow.\n\n"

             "    If this test case is just a fluke, the other option is to just avoid it\n"
             "    altogether, and find one that is less of a CPU hog.\n",
             exec_tmout);

        FATAL("Test case '%s' results in a timeout", fn);
      }

    case FAULT_CRASH:

      if (crash_mode)
        break;

      if (skip_crashes)
      {
        WARNF("Test case results in a crash (skipping)");
        q->cal_failed = CAL_CHANCES;
        cal_failures++;
        break;
      }

      if (mem_limit)
      {

        SAYF("\n" cLRD "[-] " cRST
             "Oops, the program crashed with one of the test cases provided. There are\n"
             "    several possible explanations:\n\n"

             "    - The test case causes known crashes under normal working conditions. If\n"
             "      so, please remove it. The fuzzer should be seeded with interesting\n"
             "      inputs - but not ones that cause an outright crash.\n\n"

             "    - The current memory limit (%s) is too low for this program, causing\n"
             "      it to die due to OOM when parsing valid files. To fix this, try\n"
             "      bumping it up with the -m setting in the command line. If in doubt,\n"
             "      try something along the lines of:\n\n"

#ifdef RLIMIT_AS
             "      ( ulimit -Sv $[%llu << 10]; /path/to/binary [...] <testcase )\n\n"
#else
             "      ( ulimit -Sd $[%llu << 10]; /path/to/binary [...] <testcase )\n\n"
#endif /* ^RLIMIT_AS */

             "      Tip: you can use http://jwilk.net/software/recidivm to quickly\n"
             "      estimate the required amount of virtual memory for the binary. Also,\n"
             "      if you are using ASAN, see %s/notes_for_asan.txt.\n\n"

#ifdef __APPLE__

             "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
             "      break afl-fuzz performance optimizations when running platform-specific\n"
             "      binaries. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

             "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
             "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
             DMS(mem_limit << 20), mem_limit - 1, doc_path);
      }
      else
      {

        SAYF("\n" cLRD "[-] " cRST
             "Oops, the program crashed with one of the test cases provided. There are\n"
             "    several possible explanations:\n\n"

             "    - The test case causes known crashes under normal working conditions. If\n"
             "      so, please remove it. The fuzzer should be seeded with interesting\n"
             "      inputs - but not ones that cause an outright crash.\n\n"

#ifdef __APPLE__

             "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
             "      break afl-fuzz performance optimizations when running platform-specific\n"
             "      binaries. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

             "    - Least likely, there is a horrible bug in the fuzzer. If other options\n"
             "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
      }

      FATAL("Test case '%s' results in a crash", fn);

    case FAULT_ERROR:

      FATAL("Unable to execute target application ('%s')", argv[0]);

    case FAULT_NOINST:

      FATAL("No instrumentation detected");

    case FAULT_NOBITS:

      useless_at_start++;

      if (!in_bitmap && !shuffle_queue)
        WARNF("No new instrumentation output, test case may be useless.");

      break;
    }

结束了循环

最后进行了错误处理

if (cal_failures)
{

  if (cal_failures == queued_paths)
    FATAL("All test cases time out%s, giving up!",
          skip_crashes ? " or crash" : "");

  WARNF("Skipped %u test cases (%0.02f%%) due to timeouts%s.", cal_failures,
        ((double)cal_failures) * 100 / queued_paths,
        skip_crashes ? " or crashes" : "");

  if (cal_failures * 5 > queued_paths)
    WARNF(cLRD "High percentage of rejected test cases, check settings!");
}

OKF("All test cases processed.");

calibrate_case

用来运行一次fuzz目标程序，并且记录fuzz运行的结果，用来校准对应的testcase

首先创建了一些局部变量

static u8 calibrate_case(char **argv, struct queue_entry *q, u8 *use_mem,
                         u32 handicap, u8 from_queue)
{

  static u8 first_trace[MAP_SIZE];

  u8 fault = 0, new_bits = 0, var_detected = 0, hnb = 0,
     first_run = (q->exec_cksum == 0);

  u64 start_us, stop_us;

  s32 old_sc = stage_cur, old_sm = stage_max;
  u32 use_tmout = exec_tmout;
  u8 *old_sn = stage_name;

然后判断testcase时是否来自queue，或者是否是resume一个fuzz job，也即，是否是一个已经运行过一次的fuzz，然后再接着运行，如果是，则更新use_tmout

if (!from_queue || resuming_fuzz)
  use_tmout = MAX(exec_tmout + CAL_TMOUT_ADD,
                  exec_tmout * CAL_TMOUT_PERC / 100);
                  // 提升tmout的值

更新cal_failed 的值

1	q->cal_failed++;

stage_name = "calibration";
// 更新stage_name 
stage_max = fast_cal ? 3 : CAL_CYCLES;
// 设置cal的最大论数，如果需要fast_cal则设置最大3论

确保没有forkserver并且非dump_mode时，创建一个forkserver

1 2	if (dumb_mode != 1 && !no_forkserver && !forksrv_pid) init_forkserver(argv);

什么是dump_mode呢？

dump_mode即没有插桩和确定性(deterministic)变异阶段的模式

q->exec_cksum 初始时为0，因此此处是用来判断是否是初次运行

virgin_bits是一个bitmap，用来记录没有触及的block

if (q->exec_cksum)
{
  // 如果不是初次运行
  memcpy(first_trace, trace_bits, MAP_SIZE);
  
  hnb = has_new_bits(virgin_bits);
  if (hnb > new_bits)
    new_bits = hnb;
    // 更新new_bits数
}

设置起始时间

1	start_us = get_cur_time_us();

接下来进入循环轮次:

这里的可变分支，指的是，对于同样的输入，可能可以到达也可能不能到达的分支。

在循环中，通过 run_target 真正让目标程序开始运行，

 for (stage_cur = 0; stage_cur < stage_max; stage_cur++)
 {
// 循环轮次由前面的代码确定
   u32 cksum;

   if (!first_run && !(stage_cur % stats_update_freq))
     show_stats();
   // 如果不是第一次运行 并且state_cur 隔 stats_update_freq 次
   // 则show_stats 
   write_to_testcase(use_mem, q->len);
   // 将testcase写入out_file 
   fault = run_target(argv, use_tmout);
   // 运行目标程序
   /* stop_soon is set by the handler for Ctrl+C. When it's pressed,
      we want to bail out quickly. */

   if (stop_soon || fault != crash_mode)
     goto abort_calibration;
   // 如果收到终止signal 
   if (!dumb_mode && !stage_cur && !count_bytes(trace_bits))
   {
     fault = FAULT_NOINST;
     goto abort_calibration;
   }

   cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
   // 计算bitmap的hash 

   if (q->exec_cksum != cksum)
   {
   // bitmap发生变化
   // 一般在第一次运行，或者在同样的参数下，分支可变的情形
     hnb = has_new_bits(virgin_bits);
     // 计算virgin_bits的更新
     if (hnb > new_bits)
       new_bits = hnb;
   
     if (q->exec_cksum)
     {
     // 如果是分支可变的情形
       u32 i;

       for (i = 0; i < MAP_SIZE; i++)
       {
	// 循环遍历，找到可变的region，如果找到了，就延长轮次
	// 以便进行更多的遍历
         if (!var_bytes[i] && first_trace[i] != trace_bits[i])
         {
           var_bytes[i] = 1;
           stage_max = CAL_CYCLES_LONG;
         }
       }
       var_detected = 1;
       // 检测到可变
     }
     else
     {
     // 如果是第一次运行
       q->exec_cksum = cksum;
       memcpy(first_trace, trace_bits, MAP_SIZE);
     }
   }
 }

增加总运行时间和轮次计算

stop_us = get_cur_time_us();

total_cal_us += stop_us - start_us;
total_cal_cycles += stage_max;

更新queue的相关成员

q->exec_us = (stop_us - start_us) / stage_max;
// 平均每轮的执行时间
q->bitmap_size = count_bytes(trace_bits);
q->handicap = handicap;
q->cal_failed = 0;
// 将之前的设置的1还原为0，表示没有失败

还需要用 update_bitmap_score 更新bitmap的分数

total_bitmap_size += q->bitmap_size;
total_bitmap_entries++;

update_bitmap_score(q);

如果没有产生新bit

1 2	if (!dumb_mode && first_run && !fault && !new_bits) fault = FAULT_NOBITS;

进入最后的收尾的处理阶段

abort_calibration:

  if (new_bits == 2 && !q->has_new_cov)
  {
  // has_new_bits在存在new bit下的返回值就是2
    q->has_new_cov = 1;
    // 有新的覆盖率
    queued_with_cov++;
    // 有新覆盖率的queue加一
  }

  /* Mark variable paths. */

  if (var_detected)
  {
    // 计算可变bytes的数量
    var_byte_count = count_bytes(var_bytes);

    if (!q->var_behavior)
    {
    // 如果可变
      mark_as_variable(q);
      // 通过创建一个variable_behavior文件标记其可变
      queued_variable++;
    }
  }
  // 恢复之前的stage相关的全局变量
  stage_name = old_sn;
  stage_cur = old_sc;
  stage_max = old_sm;

  if (!first_run)
    show_stats();

  return fault;
}

init_forkserver

用来创建一个forkserver，避免频繁的execve

首先创建两个管道，st_pipe 和 ctl_pipe ，分别用于传递状态和命令

EXP_ST void init_forkserver(char **argv)
{

  static struct itimerval it;
  int st_pipe[2], ctl_pipe[2];
  int status;
  s32 rlen;

  ACTF("Spinning up the fork server...");

  if (pipe(st_pipe) || pipe(ctl_pipe))
    PFATAL("pipe() failed");

接下来通过fork产生一个子进程，父进程是fuzzer，子进程是forkserver

1
2
3

forksrv_pid = fork(); 
if (forksrv_pid < 0)
    PFATAL("fork() failed");

通过pid控制子进程进入如下if语句

 if (!forksrv_pid)
 { 
   // 省略部分对于openbsd的特殊处理
   setsid();
   // 通过setsid使得子进程成为一个单独进程组
   dup2(dev_null_fd, 1);
   dup2(dev_null_fd, 2);
   // 将标准输出和标准错误重定向到/dev/null 

// 如果没有设置输出文件 
// 将标准输入重定向到此文件
// 此处笔者还没有搞清楚为什么
   if (out_file)
   {

     dup2(dev_null_fd, 0);
   }
   else
   {

     dup2(out_fd, 0);
     close(out_fd);
   }

接下来设置状态和控制管道的文件描述符

if (dup2(ctl_pipe[0], FORKSRV_FD) < 0)
  PFATAL("dup2() failed");
if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0)
  PFATAL("dup2() failed");

关闭多余描述符

close(ctl_pipe[0]);
close(ctl_pipe[1]);
close(st_pipe[0]);
close(st_pipe[1]);

close(out_dir_fd);
close(dev_null_fd);
close(dev_urandom_fd);
close(fileno(plot_file));

设置延迟绑定

1 2	if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0);

设置ASAN相关环境变量

setenv("ASAN_OPTIONS", "abort_on_error=1:"
                       "detect_leaks=0:"
                       "symbolize=0:"
                       "allocator_may_return_null=1",
       0);

设置MSAN相关环境变量

setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
                                                          "symbolize=0:"
                                                          "abort_on_error=1:"
                                                          "allocator_may_return_null=1:"
                                                          "msan_track_origins=0",
       0);

通过execv执行子进程

1	execv(target_path, argv);

此时之后目标程序的运行空间会覆盖当前运行时

如果execv失败，通知父进程

1
2
3

  *(u32 *)trace_bits = EXEC_FAIL_SIG;
  exit(0);
}

子进程if结束

父进程通过状态管道读取四个字节，来判断子进程的开始，并针对性完成错误处理

  close(ctl_pipe[0]);
  close(st_pipe[1]);

  fsrv_ctl_fd = ctl_pipe[1];
  fsrv_st_fd = st_pipe[0];

  /* Wait for the fork server to come up, but don't wait too long. */

  it.it_value.tv_sec = ((exec_tmout * FORK_WAIT_MULT) / 1000);
  it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;

  setitimer(ITIMER_REAL, &it, NULL);

  rlen = read(fsrv_st_fd, &status, 4);

  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;

  setitimer(ITIMER_REAL, &it, NULL);

  /* If we have a four-byte "hello" message from the server, we're all set.
     Otherwise, try to figure out what went wrong. */

  if (rlen == 4)
  {
    OKF("All right - fork server is up.");
    return;
  }

  if (child_timed_out)
    FATAL("Timeout while initializing fork server (adjusting -t may help)");

  if (waitpid(forksrv_pid, &status, 0) <= 0)
    PFATAL("waitpid() failed");

  if (WIFSIGNALED(status))
  {

    if (mem_limit && mem_limit < 500 && uses_asan)
    {

      SAYF("\n" cLRD "[-] " cRST
           "Whoops, the target binary crashed suddenly, before receiving any input\n"
           "    from the fuzzer! Since it seems to be built with ASAN and you have a\n"
           "    restrictive memory limit configured, this is expected; please read\n"
           "    %s/notes_for_asan.txt for help.\n",
           doc_path);
    }
    else if (!mem_limit)
    {

      SAYF("\n" cLRD "[-] " cRST
           "Whoops, the target binary crashed suddenly, before receiving any input\n"
           "    from the fuzzer! There are several probable explanations:\n\n"

           "    - The binary is just buggy and explodes entirely on its own. If so, you\n"
           "      need to fix the underlying problem or find a better replacement.\n\n"

#ifdef __APPLE__

           "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
           "      break afl-fuzz performance optimizations when running platform-specific\n"
           "      targets. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

           "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
           "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
    }
    else
    {

      SAYF("\n" cLRD "[-] " cRST
           "Whoops, the target binary crashed suddenly, before receiving any input\n"
           "    from the fuzzer! There are several probable explanations:\n\n"

           "    - The current memory limit (%s) is too restrictive, causing the\n"
           "      target to hit an OOM condition in the dynamic linker. Try bumping up\n"
           "      the limit with the -m setting in the command line. A simple way confirm\n"
           "      this diagnosis would be:\n\n"

#ifdef RLIMIT_AS
           "      ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )\n\n"
#else
           "      ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )\n\n"
#endif /* ^RLIMIT_AS */

           "      Tip: you can use http://jwilk.net/software/recidivm to quickly\n"
           "      estimate the required amount of virtual memory for the binary.\n\n"

           "    - The binary is just buggy and explodes entirely on its own. If so, you\n"
           "      need to fix the underlying problem or find a better replacement.\n\n"

#ifdef __APPLE__

           "    - On MacOS X, the semantics of fork() syscalls are non-standard and may\n"
           "      break afl-fuzz performance optimizations when running platform-specific\n"
           "      targets. To fix this, set AFL_NO_FORKSRV=1 in the environment.\n\n"

#endif /* __APPLE__ */

           "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
           "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
           DMS(mem_limit << 20), mem_limit - 1);
    }

    FATAL("Fork server crashed with signal %d", WTERMSIG(status));
  }

  if (*(u32 *)trace_bits == EXEC_FAIL_SIG)
    FATAL("Unable to execute target application ('%s')", argv[0]);

  if (mem_limit && mem_limit < 500 && uses_asan)
  {

    SAYF("\n" cLRD "[-] " cRST
         "Hmm, looks like the target binary terminated before we could complete a\n"
         "    handshake with the injected code. Since it seems to be built with ASAN and\n"
         "    you have a restrictive memory limit configured, this is expected; please\n"
         "    read %s/notes_for_asan.txt for help.\n",
         doc_path);
  }
  else if (!mem_limit)
  {

    SAYF("\n" cLRD "[-] " cRST
         "Hmm, looks like the target binary terminated before we could complete a\n"
         "    handshake with the injected code. Perhaps there is a horrible bug in the\n"
         "    fuzzer. Poke <lcamtuf@coredump.cx> for troubleshooting tips.\n");
  }
  else
  {

    SAYF("\n" cLRD "[-] " cRST
         "Hmm, looks like the target binary terminated before we could complete a\n"
         "    handshake with the injected code. There are %s probable explanations:\n\n"

         "%s"
         "    - The current memory limit (%s) is too restrictive, causing an OOM\n"
         "      fault in the dynamic linker. This can be fixed with the -m option. A\n"
         "      simple way to confirm the diagnosis may be:\n\n"

#ifdef RLIMIT_AS
         "      ( ulimit -Sv $[%llu << 10]; /path/to/fuzzed_app )\n\n"
#else
         "      ( ulimit -Sd $[%llu << 10]; /path/to/fuzzed_app )\n\n"
#endif /* ^RLIMIT_AS */

         "      Tip: you can use http://jwilk.net/software/recidivm to quickly\n"
         "      estimate the required amount of virtual memory for the binary.\n\n"

         "    - Less likely, there is a horrible bug in the fuzzer. If other options\n"
         "      fail, poke <lcamtuf@coredump.cx> for troubleshooting tips.\n",
         getenv(DEFER_ENV_VAR) ? "three" : "two",
         getenv(DEFER_ENV_VAR) ? "    - You are using deferred forkserver, but __AFL_INIT() is never\n"
                                 "      reached before the program terminates.\n\n"
                               : "",
         DMS(mem_limit << 20), mem_limit - 1);
  }

  FATAL("Fork server handshake failed");
}

run_target

用来运行一次目标

首先初始化了trace_bits，并设置了内存屏障

static u8 run_target(char **argv, u32 timeout)
{

  static struct itimerval it;
  static u32 prev_timed_out = 0;
  static u64 exec_ms = 0;

  int status = 0;
  u32 tb4;

  child_timed_out = 0;

  /* After this memset, trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */

  memset(trace_bits, 0, MAP_SIZE);
  MEM_BARRIER();

如果是dump_mode 并且没有forkserver，就需要先类似init_forkserver 中的部分操作，来创建子进程

  if (dumb_mode == 1 || no_forkserver)
  {

    child_pid = fork();

    if (child_pid < 0)
      PFATAL("fork() failed");

    if (!child_pid)
    {

      struct rlimit r;

      if (mem_limit)
      {

        r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;

#ifdef RLIMIT_AS

        setrlimit(RLIMIT_AS, &r); /* Ignore errors */

#else

        setrlimit(RLIMIT_DATA, &r); /* Ignore errors */

#endif /* ^RLIMIT_AS */
      }

      r.rlim_max = r.rlim_cur = 0;

      setrlimit(RLIMIT_CORE, &r); /* Ignore errors */

      /* Isolate the process and configure standard descriptors. If out_file is
         specified, stdin is /dev/null; otherwise, out_fd is cloned instead. */

      setsid();

      dup2(dev_null_fd, 1);
      dup2(dev_null_fd, 2);

      if (out_file)
      {

        dup2(dev_null_fd, 0);
      }
      else
      {

        dup2(out_fd, 0);
        close(out_fd);
      }

      /* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */

      close(dev_null_fd);
      close(out_dir_fd);
      close(dev_urandom_fd);
      close(fileno(plot_file));

      /* Set sane defaults for ASAN if nothing else specified. */

      setenv("ASAN_OPTIONS", "abort_on_error=1:"
                             "detect_leaks=0:"
                             "symbolize=0:"
                             "allocator_may_return_null=1",
             0);

      setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
                                                                "symbolize=0:"
                                                                "msan_track_origins=0",
             0);

      execv(target_path, argv);

      /* Use a distinctive bitmap value to tell the parent about execv()
         falling through. */

      *(u32 *)trace_bits = EXEC_FAIL_SIG;
      exit(0);
    }
  }

反之如果在非dump mode，那么通过控制管道通知子进程运行，并获取其pid

 else
 {

   s32 res;

   /* In non-dumb mode, we have the fork server up and running, so simply
      tell it to have at it, and then read back PID. */

   if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4)
   {
    // 向forkserver发送消息
    if (stop_soon)
       return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
   }

   if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4)
   {
// 接受子进程pid
     if (stop_soon)
       return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
   }

   if (child_pid <= 0)
     FATAL("Fork server is misbehaving (OOM?)");
 }

设置timeout

it.it_value.tv_sec = (timeout / 1000);
it.it_value.tv_usec = (timeout % 1000) * 1000;

setitimer(ITIMER_REAL, &it, NULL);

阻塞，等待子进程运行结束

 if (dumb_mode == 1 || no_forkserver)
 {
// 如果在dumpmode，通过waitpid阻塞
   if (waitpid(child_pid, &status, 0) <= 0)
     PFATAL("waitpid() failed");
 }
 else
 {

   s32 res;
   // 如果存在forkserver 
   // 通过读管道阻塞
   if ((res = read(fsrv_st_fd, &status, 4)) != 4)
   {

     if (stop_soon)
       return 0;
     RPFATAL(res, "Unable to communicate with fork server (OOM?)");
   }
 }

接下来根据子进程返回的status，进行对应的错误处理

  if (!WIFSTOPPED(status))
    child_pid = 0;

  getitimer(ITIMER_REAL, &it);
  exec_ms = (u64)timeout - (it.it_value.tv_sec * 1000 +
                            it.it_value.tv_usec / 1000);
  // 计算运行时间
  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;
   
  setitimer(ITIMER_REAL, &it, NULL);

  total_execs++;
  // 总运行次数加一
  /* Any subsequent operations on trace_bits must not be moved by the
     compiler below this point. Past this location, trace_bits[] behave
     very normally and do not have to be treated as volatile. */

  MEM_BARRIER();

  tb4 = *(u32 *)trace_bits;
 
#ifdef WORD_SIZE_64
  classify_counts((u64 *)trace_bits);
#else
  classify_counts((u32 *)trace_bits);
#endif /* ^WORD_SIZE_64 */
  
  prev_timed_out = child_timed_out;

  /* Report outcome to caller. */

  if (WIFSIGNALED(status) && !stop_soon)
  {
  // 根据信号判断错误类型
    kill_signal = WTERMSIG(status);

    if (child_timed_out && kill_signal == SIGKILL)
      return FAULT_TMOUT;

    return FAULT_CRASH;
  }

  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
     must use a special exit code. */

  if (uses_asan && WEXITSTATUS(status) == MSAN_ERROR)
  {
  // 根据exitstatus判断错误类型
    kill_signal = 0;
    return FAULT_CRASH;
  }

  if ((dumb_mode == 1 || no_forkserver) && tb4 == EXEC_FAIL_SIG)
    return FAULT_ERROR;

  /* It makes sense to account for the slowest units only if the testcase was run
  under the user defined timeout. */
  if (!(timeout > exec_tmout) && (slowest_exec_ms < exec_ms))
  {
    slowest_exec_ms = exec_ms;
  }
  // 如果顺利运行到最后，说明没有错误
  return FAULT_NONE;
}

update_bitmap_score

这部分涉及到了AFL维护的一个static struct queue_entry *top_rated[MAP_SIZE]
数组，这个数组记录了每个bitmap中的一项（也就是每个基本块）对应的最favored的testcase。

这个favored score由执行时间和长度相乘得到。


static void update_bitmap_score(struct queue_entry *q)
{

  u32 i;
  u64 fav_factor = q->exec_us * q->len;

  /* For every byte set in trace_bits[], see if there is a previous winner,
     and how it compares to us. */

  for (i = 0; i < MAP_SIZE; i++)

    if (trace_bits[i])
    {

      if (top_rated[i])
      {

        /* Faster-executing or smaller test cases are favored. */
		// favored score由执行时间和长度相乘得到。越小越好
        if (fav_factor > top_rated[i]->exec_us * top_rated[i]->len)
          continue;

        /* Looks like we're going to win. Decrease ref count for the
           previous winner, discard its trace_bits[] if necessary. */

        if (!--top_rated[i]->tc_ref)
        {
          ck_free(top_rated[i]->trace_mini);
          top_rated[i]->trace_mini = 0;
        }
      }

      /* Insert ourselves as the new winner. */

      top_rated[i] = q;
      q->tc_ref++;
	  // 如果更favored，则更新top_rated数组
      if (!q->trace_mini)
      {
        q->trace_mini = ck_alloc(MAP_SIZE >> 3);
        minimize_bits(q->trace_mini, trace_bits);
        // 压缩trace_bits为bitmap
      }

      score_changed = 1;
      // 设置flag为1
    }
}

cull_queue | 挑选更好的种子

此函数通过标记更favored 的种子，使得favored的种子得到更大的运行概率

static void cull_queue(void)
{

  struct queue_entry *q;
  static u8 temp_v[MAP_SIZE >> 3];
  u32 i;

  if (dumb_mode || !score_changed)
    return;

  score_changed = 0;

  memset(temp_v, 255, MAP_SIZE >> 3);

  queued_favored = 0;
  pending_favored = 0;

  q = queue;

首先清空每个queue实体的favored

while (q)
{
  q->favored = 0;
  q = q->next;
}

tmep_v数组用来标识没有遍历到的区域，以下循环将所有存在不同分支的种子筛选出来，

 for (i = 0; i < MAP_SIZE; i++)
   if (top_rated[i] && (temp_v[i >> 3] & (1 << (i & 7))))
   {
// 判断favored种子遍历的区域，是否已经在之前筛选出了（将对应的temp_v置为0了）
     u32 j = MAP_SIZE >> 3;

     /* Remove all bits belonging to the current entry from temp_v. */
  // 然后将所有当前种子遍历过的区域从temp_v中去除
     while (j--)
       if (top_rated[i]->trace_mini[j])
         temp_v[j] &= ~top_rated[i]->trace_mini[j];

     top_rated[i]->favored = 1;
     // 然后增加其favored值
     queued_favored++;

     if (!top_rated[i]->was_fuzzed)
       pending_favored++;
   }


q = queue;
// 对于不favored的，通过创建redundant文件的方式表明此种子是多余的
while (q)
{
  mark_as_redundant(q, !q->favored);
  q = q->next;
}

fuzz_one | 种子的变异

此函数用于从queue中选取一个种子，对种子进行变异，返回0说明运行成功，否则运行失败

首先进行了一些细节的处理, 包括:

首先判断是否要跳过当前testcase给favored的testcase更多运行机会
- 如果存在 pending_favored ，并且当前queue已经运行过或者不favored，那么为了将时间留给pending_favored的testcase, 有99%的几率直接跳过当前种子
- 如果无pending_favored，对于不是favored的testcase, 如果已经fuzz过, 95%概率跳过, 如果没有fuzz过, 75%概率跳过
- 如果是favored，不跳过

static u8 fuzz_one(char **argv)
{

  s32 len, fd, temp_len, i, j;
  u8 *in_buf, *out_buf, *orig_in, *ex_tmp, *eff_map = 0;
  u64 havoc_queued, orig_hit_cnt, new_hit_cnt;
  u32 splice_cycle = 0, perf_score = 100, orig_perf, prev_cksum, eff_cnt = 1;

  u8 ret_val = 1, doing_det = 0;

  u8 a_collect[MAX_AUTO_EXTRA];
  u32 a_len = 0;

#ifdef IGNORE_FINDS

  /* In IGNORE_FINDS mode, skip any entries that weren't in the
     initial data set. */

  if (queue_cur->depth > 1)
    return 1;

#else

  if (pending_favored)
  {

    /* If we have any favored, non-fuzzed new arrivals in the queue,
       possibly skip to them at the expense of already-fuzzed or non-favored
       cases. */

    if ((queue_cur->was_fuzzed || !queue_cur->favored) &&
        UR(100) < SKIP_TO_NEW_PROB)
      return 1;
  }
  else if (!dumb_mode && !queue_cur->favored && queued_paths > 10)
  {

    /* Otherwise, still possibly skip non-favored cases, albeit less often.
       The odds of skipping stuff are higher for already-fuzzed inputs and
       lower for never-fuzzed entries. */

    if (queue_cycle > 1 && !queue_cur->was_fuzzed)
    {

      if (UR(100) < SKIP_NFAV_NEW_PROB)
      // random(0, 100) < 75 ; 75%
        return 1;
    }
    else
    {

      if (UR(100) < SKIP_NFAV_OLD_PROB)
      // random(0, 100) < 95 ; 95%
        return 1;
    }
  }
  // 判断需要跳过的情形

#endif /* ^IGNORE_FINDS */

  if (not_on_tty)
  {
    ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...",
         current_entry, queued_paths, unique_crashes);
    fflush(stdout);
  }

  /* Map the test case into memory. */
  // 将testcase映射进内存
  fd = open(queue_cur->fname, O_RDONLY);

  if (fd < 0)
    PFATAL("Unable to open '%s'", queue_cur->fname);

  len = queue_cur->len;

  orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);

  if (orig_in == MAP_FAILED)
    PFATAL("Unable to mmap '%s'", queue_cur->fname);

  close(fd);

  /* We could mmap() out_buf as MAP_PRIVATE, but we end up clobbering every
     single byte anyway, so it wouldn't give us any performance or memory usage
     benefits. */

  out_buf = ck_alloc_nozero(len);

  subseq_tmouts = 0;

  cur_depth = queue_cur->depth;

如果之前cal_failed，那么要再运行一次 calibrate_case 来校准testcase

if (queue_cur->cal_failed)
{

  u8 res = FAULT_TMOUT;

  if (queue_cur->cal_failed < CAL_CHANCES)
  {

    /* Reset exec_cksum to tell calibrate_case to re-execute the testcase
       avoiding the usage of an invalid trace_bits.
       For more info: https://github.com/AFLplusplus/AFLplusplus/pull/425 */

    queue_cur->exec_cksum = 0;

    res = calibrate_case(argv, queue_cur, in_buf, queue_cycle - 1, 0);

    if (res == FAULT_ERROR)
      FATAL("Unable to execute target application");
  }

  if (stop_soon || res != crash_mode)
  {
    cur_skipped_paths++;
    goto abandon_entry;
  }
}

接下来通过trim_case 来修剪并运行testcase

if (!dumb_mode && !queue_cur->trim_done)
{

  u8 res = trim_case(argv, queue_cur, in_buf);

  if (res == FAULT_ERROR)
    FATAL("Unable to execute target application");

  if (stop_soon)
  {
    cur_skipped_paths++;
    goto abandon_entry;
  }

  /* Don't retry trimming, even if it failed. */

  queue_cur->trim_done = 1;

  if (len != queue_cur->len)
    len = queue_cur->len;
}

memcpy(out_buf, in_buf, len);

通过calculate_score 计算分数

1	orig_perf = perf_score = calculate_score(queue_cur);

接下来进入真正的变异阶段

确定性变异

首先判断是否需要跳过确定性(deterministic)变异阶段，这部分变异没有随机性，是所有种子都要经历的阶段

if (skip_deterministic || queue_cur->was_fuzzed || queue_cur->passed_det)
  goto havoc_stage;

/* Skip deterministic fuzzing if exec path checksum puts this out of scope
   for this master instance. */

if (master_max && (queue_cur->exec_cksum % master_max) != master_id - 1)
  goto havoc_stage;

doing_det = 1;

deterministic 阶段分为以下几个部分：

bitflip

bitflip阶段是对于testcase的bit位进行翻转

bitflip 1/1

通过每次翻转一个bit，来检查是否具有类似于 “ELF” 此类魔数。

 stage_short = "flip1";
 stage_max = len << 3;
 stage_name = "bitflip 1/1";

 stage_val_type = STAGE_VAL_NONE;

 orig_hit_cnt = queued_paths + unique_crashes;
 
 prev_cksum = queue_cur->exec_cksum;
 
 for (stage_cur = 0; stage_cur < stage_max; stage_cur++)
 {

   stage_cur_byte = stage_cur >> 3;

   FLIP_BIT(out_buf, stage_cur);
// 每次翻转一个 bit
   if (common_fuzz_stuff(argv, out_buf, len))
   // 运行一次fuzz测试
     goto abandon_entry;

   FLIP_BIT(out_buf, stage_cur);
// 翻转回来
   if (!dumb_mode && (stage_cur & 7) == 7)
   {
// 根据经验，通常检查最低位的翻转最有效率
     u32 cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
  // 获取cksum
     if (stage_cur == stage_max - 1 && cksum == prev_cksum)
     {

       /* If at end of file and we are still collecting a string, grab the
          final character and force output. */

       if (a_len < MAX_AUTO_EXTRA)
         a_collect[a_len] = out_buf[stage_cur >> 3];
       a_len++;

       if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
         maybe_add_auto(a_collect, a_len);
     }
     else if (cksum != prev_cksum)
     {
  // 如果cksum不等于prev_cksum，可能是一个魔数的开始或者结束
       /* Otherwise, if the checksum has changed, see if we have something
          worthwhile queued up, and collect that if the answer is yes. */

       if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
         maybe_add_auto(a_collect, a_len);
	// 如果是一个魔数的结束
	// 那么调用 may_add_auto收集起来
       a_len = 0;
       prev_cksum = cksum;
     }

     /* Continue collecting string, but only if the bit flip actually made
        any difference - we don't want no-op tokens. */

     if (cksum != queue_cur->exec_cksum)
     {
	// 需要cksum不等于原来才需要增加a_len并记录
       if (a_len < MAX_AUTO_EXTRA)
         a_collect[a_len] = out_buf[stage_cur >> 3];
       a_len++;
     }
   }
 }

bitflip 2/1

每次翻转两个bit，运行并保留有价值的种子

stage_name = "bitflip 2/1";
stage_short = "flip2";
stage_max = (len << 3) - 1;

orig_hit_cnt = new_hit_cnt;

for (stage_cur = 0; stage_cur < stage_max; stage_cur++)
{

  stage_cur_byte = stage_cur >> 3;

  FLIP_BIT(out_buf, stage_cur);
  FLIP_BIT(out_buf, stage_cur + 1);
  // 翻转两个bit
  if (common_fuzz_stuff(argv, out_buf, len))
    goto abandon_entry;

  FLIP_BIT(out_buf, stage_cur);
  FLIP_BIT(out_buf, stage_cur + 1);
}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_FLIP2] += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_FLIP2] += stage_max;

bitflip 4/1

每次翻转4个bit，运行并保留有价值的种子

stage_name = "bitflip 4/1";
stage_short = "flip4";
stage_max = (len << 3) - 3;

orig_hit_cnt = new_hit_cnt;

for (stage_cur = 0; stage_cur < stage_max; stage_cur++)
{

  stage_cur_byte = stage_cur >> 3;

  FLIP_BIT(out_buf, stage_cur);
  FLIP_BIT(out_buf, stage_cur + 1);
  FLIP_BIT(out_buf, stage_cur + 2);
  FLIP_BIT(out_buf, stage_cur + 3);

  if (common_fuzz_stuff(argv, out_buf, len))
    goto abandon_entry;

  FLIP_BIT(out_buf, stage_cur);
  FLIP_BIT(out_buf, stage_cur + 1);
  FLIP_BIT(out_buf, stage_cur + 2);
  FLIP_BIT(out_buf, stage_cur + 3);
}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_FLIP4] += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_FLIP4] += stage_max;

bitflip 8/8

每次反转一整个byte，并记录那些即使全部翻转也对执行路径没有影响的byte，避免在之后花费时间去测试

/* Walking byte. */

stage_name = "bitflip 8/8";
stage_short = "flip8";
stage_max = len;

orig_hit_cnt = new_hit_cnt;

for (stage_cur = 0; stage_cur < stage_max; stage_cur++)
{

  stage_cur_byte = stage_cur;

  out_buf[stage_cur] ^= 0xFF;
  // 每次翻转一个byte
  if (common_fuzz_stuff(argv, out_buf, len))
    goto abandon_entry;
  // 运行测试
  /* We also use this stage to pull off a simple trick: we identify
     bytes that seem to have no effect on the current execution path
     even when fully flipped - and we skip them during more expensive
     deterministic stages, such as arithmetics or known ints. */

  if (!eff_map[EFF_APOS(stage_cur)])
  {

    u32 cksum;

    /* If in dumb mode or if the file is very short, just flag everything
       without wasting time on checksums. */

    if (!dumb_mode && len >= EFF_MIN_LEN)
      cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
    else
      cksum = ~queue_cur->exec_cksum;

    if (cksum != queue_cur->exec_cksum)
    {
    // 用来区分一些无效byte，为后面的阶段做准备
      eff_map[EFF_APOS(stage_cur)] = 1;
      eff_cnt++;
      // 通过一个eff_map 来标记有效byte
    }
  }

  out_buf[stage_cur] ^= 0xFF;
  // 还原byte
}

/* If the effector map is more than EFF_MAX_PERC dense, just flag the
   whole thing as worth fuzzing, since we wouldn't be saving much time
   anyway. */

if (eff_cnt != EFF_ALEN(len) &&
    eff_cnt * 100 / EFF_ALEN(len) > EFF_MAX_PERC)
{

  // 如果eff_map 大于 EFF_MAX_PERC 
  // 那么直接把整个testcase标记为值得fuzz的，这不会多浪费多少时间
  memset(eff_map, 1, EFF_ALEN(len));

  blocks_eff_select += EFF_ALEN(len);
}
else
{

  blocks_eff_select += eff_cnt;
}

blocks_eff_total += EFF_ALEN(len);

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_FLIP8] += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_FLIP8] += stage_max;

接下来是 bitflip 16/8

处理类似

if (len < 2) goto skip_bitflip;

stage_name  = "bitflip 16/8";
stage_short = "flip16";
stage_cur   = 0;
stage_max   = len - 1;

orig_hit_cnt = new_hit_cnt;

for (i = 0; i < len - 1; i++) {

  /* Let's consult the effector map... */

  if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)]) {
    stage_max--;
    continue;
  }

  stage_cur_byte = i;

  *(u16*)(out_buf + i) ^= 0xFFFF;

  if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
  stage_cur++;

  *(u16*)(out_buf + i) ^= 0xFFFF;


}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_FLIP16]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_FLIP16] += stage_max;

然后是 bitflip 32/8，逻辑相同

if (len < 4) goto skip_bitflip;

/* Four walking bytes. */

stage_name  = "bitflip 32/8";
stage_short = "flip32";
stage_cur   = 0;
stage_max   = len - 3;

orig_hit_cnt = new_hit_cnt;

for (i = 0; i < len - 3; i++) {

  /* Let's consult the effector map... */
  if (!eff_map[EFF_APOS(i)] && !eff_map[EFF_APOS(i + 1)] &&
      !eff_map[EFF_APOS(i + 2)] && !eff_map[EFF_APOS(i + 3)]) {
    stage_max--;
    continue;
  }

  stage_cur_byte = i;

  *(u32*)(out_buf + i) ^= 0xFFFFFFFF;

  if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
  stage_cur++;

  *(u32*)(out_buf + i) ^= 0xFFFFFFFF;

}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_FLIP32]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_FLIP32] += stage_max;

以上，第一个bitflip阶段就完成了

ARITHMETIC INC/DEC

这个阶段是算数加减阶段

首先是 arith 8/8 ，对一个byte大小的数据进行加减

 stage_name  = "arith 8/8";
 stage_short = "arith8";
 stage_cur   = 0;
 stage_max   = 2 * len * ARITH_MAX;

 stage_val_type = STAGE_VAL_LE;

 orig_hit_cnt = new_hit_cnt;

 for (i = 0; i < len; i++) {

   u8 orig = out_buf[i];
	
   /* Let's consult the effector map... */

   if (!eff_map[EFF_APOS(i)]) {
     stage_max -= 2 * ARITH_MAX;
     continue;
     // 如果不是有效位置，那么就避免进行变异
   }

   stage_cur_byte = i;

   for (j = 1; j <= ARITH_MAX; j++) {
   // 这里的 ARITH_MAX 是35
     u8 r = orig ^ (orig + j);
	
     /* Do arithmetic operations only if the result couldn't be a product
        of a bitflip. */
     // 并且要确保进行算术运算后的值不可以经过bitflip得到，避免重复变异
     if (!could_be_bitflip(r)) {

       stage_cur_val = j;
       out_buf[i] = orig + j;

       if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
       stage_cur++;

     } else stage_max--;

     r =  orig ^ (orig - j);

     if (!could_be_bitflip(r)) {

       stage_cur_val = -j;
       out_buf[i] = orig - j;

       if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;
       stage_cur++;

     } else stage_max--;

     out_buf[i] = orig;
// 加减法都尝试一次
   }

 }

 new_hit_cnt = queued_paths + unique_crashes;

 stage_finds[STAGE_ARITH8]  += new_hit_cnt - orig_hit_cnt;
 stage_cycles[STAGE_ARITH8] += stage_max;

然后还有

arith 16/8
arith 32/8
分别进行16位和32位的加减，这里不过多赘述

INTERESTING VALUES

这一步主要是使用一些有意义的值来替换

首先是 interest 8/8

用interest值替换一个8位

stage_name  = "interest 8/8";
stage_short = "int8";
stage_cur   = 0;
stage_max   = len * sizeof(interesting_8);

stage_val_type = STAGE_VAL_LE;

orig_hit_cnt = new_hit_cnt;

/* Setting 8-bit integers. */

for (i = 0; i < len; i++) {

  u8 orig = out_buf[i];

  /* Let's consult the effector map... */

  if (!eff_map[EFF_APOS(i)]) {
    stage_max -= sizeof(interesting_8);
    continue;
  }

  stage_cur_byte = i;

  for (j = 0; j < sizeof(interesting_8); j++) {

    /* Skip if the value could be a product of bitflips or arithmetics. */

    if (could_be_bitflip(orig ^ (u8)interesting_8[j]) ||
        could_be_arith(orig, (u8)interesting_8[j], 1)) {
      stage_max--;
      continue;
    }

    stage_cur_val = interesting_8[j];
    out_buf[i] = interesting_8[j];

    if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

    out_buf[i] = orig;
    stage_cur++;

  }

}

同样的，还有:

interest 16/8
interest 32/8

DICTIONARY STUFF

这一阶段是使用字典或者之前得到的有意义的extras替换种子的内容

首先是替换为extras

/********************
 * DICTIONARY STUFF *
 ********************/

if (!extras_cnt) goto skip_user_extras;

/* Overwrite with user-supplied extras. */

stage_name  = "user extras (over)";
stage_short = "ext_UO";
stage_cur   = 0;
stage_max   = extras_cnt * len;

stage_val_type = STAGE_VAL_NONE;

orig_hit_cnt = new_hit_cnt;

for (i = 0; i < len; i++) {

  u32 last_len = 0;

  stage_cur_byte = i;

  /* Extras are sorted by size, from smallest to largest. This means
     that we don't have to worry about restoring the buffer in
     between writes at a particular offset determined by the outer
     loop. */

  for (j = 0; j < extras_cnt; j++) {

    /* Skip extras probabilistically if extras_cnt > MAX_DET_EXTRAS. Also
       skip them if there's no room to insert the payload, if the token
       is redundant, or if its entire span has no bytes set in the effector
       map. */

    if ((extras_cnt > MAX_DET_EXTRAS && UR(extras_cnt) >= MAX_DET_EXTRAS) ||
        extras[j].len > len - i ||
        !memcmp(extras[j].data, out_buf + i, extras[j].len) ||
        !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, extras[j].len))) {

      stage_max--;
      continue;

    }

    last_len = extras[j].len;
    memcpy(out_buf + i, extras[j].data, last_len);

    if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

    stage_cur++;

  }

  /* Restore all the clobbered memory. */
  memcpy(out_buf + i, in_buf + i, last_len);

}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_EXTRAS_UO]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_EXTRAS_UO] += stage_max;

或者插入extras

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_EXTRAS_UO]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_EXTRAS_UO] += stage_max;

/* Insertion of user-supplied extras. */

stage_name  = "user extras (insert)";
stage_short = "ext_UI";
stage_cur   = 0;
stage_max   = extras_cnt * (len + 1);

orig_hit_cnt = new_hit_cnt;

ex_tmp = ck_alloc(len + MAX_DICT_FILE);

for (i = 0; i <= len; i++) {

  stage_cur_byte = i;

  for (j = 0; j < extras_cnt; j++) {

    if (len + extras[j].len > MAX_FILE) {
      stage_max--; 
      continue;
    }

    /* Insert token */
    memcpy(ex_tmp + i, extras[j].data, extras[j].len);

    /* Copy tail */
    memcpy(ex_tmp + i + extras[j].len, out_buf + i, len - i);

    if (common_fuzz_stuff(argv, ex_tmp, len + extras[j].len)) {
      ck_free(ex_tmp);
      goto abandon_entry;
    }

    stage_cur++;

  }

  /* Copy head */
  ex_tmp[i] = out_buf[i];

}

ck_free(ex_tmp);

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_EXTRAS_UI]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_EXTRAS_UI] += stage_max;

最后尝试之前变异阶段得到的extras:

if (!a_extras_cnt) goto skip_extras;

stage_name  = "auto extras (over)";
stage_short = "ext_AO";
stage_cur   = 0;
stage_max   = MIN(a_extras_cnt, USE_AUTO_EXTRAS) * len;

stage_val_type = STAGE_VAL_NONE;

orig_hit_cnt = new_hit_cnt;

for (i = 0; i < len; i++) {

  u32 last_len = 0;

  stage_cur_byte = i;

  for (j = 0; j < MIN(a_extras_cnt, USE_AUTO_EXTRAS); j++) {

    /* See the comment in the earlier code; extras are sorted by size. */

    if (a_extras[j].len > len - i ||
        !memcmp(a_extras[j].data, out_buf + i, a_extras[j].len) ||
        !memchr(eff_map + EFF_APOS(i), 1, EFF_SPAN_ALEN(i, a_extras[j].len))) {

      stage_max--;
      continue;

    }

    last_len = a_extras[j].len;
    memcpy(out_buf + i, a_extras[j].data, last_len);

    if (common_fuzz_stuff(argv, out_buf, len)) goto abandon_entry;

    stage_cur++;

  }

  /* Restore all the clobbered memory. */
  memcpy(out_buf + i, in_buf + i, last_len);

}

new_hit_cnt = queued_paths + unique_crashes;

stage_finds[STAGE_EXTRAS_AO]  += new_hit_cnt - orig_hit_cnt;
stage_cycles[STAGE_EXTRAS_AO] += stage_max;

Havoc Stage

havoc是不确定的大变异

首先，由于splice阶段也会进行havoc，所以要进行区分此时是直接运行的havoc还是splice阶段运行的

stage_cur_byte = -1;

/* The havoc stage mutation code is also invoked when splicing files; if the
   splice_cycle variable is set, generate different descriptions and such. */

if (!splice_cycle) {

  stage_name  = "havoc";
  stage_short = "havoc";
  stage_max   = (doing_det ? HAVOC_CYCLES_INIT : HAVOC_CYCLES) *
                perf_score / havoc_div / 100;

} else {

  static u8 tmp[32];

  perf_score = orig_perf;

  sprintf(tmp, "splice %u", splice_cycle);
  stage_name  = tmp;
  stage_short = "splice";
  stage_max   = SPLICE_HAVOC * perf_score / havoc_div / 100;

}

if (stage_max < HAVOC_MIN) stage_max = HAVOC_MIN;

temp_len = len;

orig_hit_cnt = queued_paths + unique_crashes;

havoc_queued = queued_paths;

接下来是一系列变异循环：

首先，这里有两个循环，外层循环控制测试运行次数，内层循环控制变异个数

在内层循环中，通过随机数来选择一种变异策略，策略包括翻转、加减、随机插入等等

在经过n次随机变异后，再通过common_fuzz_stuff 运行测试

for (stage_cur = 0; stage_cur < stage_max; stage_cur++) {

  u32 use_stacking = 1 << (1 + UR(HAVOC_STACK_POW2));

  stage_cur_val = use_stacking;
 
  for (i = 0; i < use_stacking; i++) {

    switch (UR(15 + ((extras_cnt + a_extras_cnt) ? 2 : 0))) {

      case 0:

        /* Flip a single bit somewhere. Spooky! */

        FLIP_BIT(out_buf, UR(temp_len << 3));
        break;

      case 1: 

        /* Set byte to interesting value. */

        out_buf[UR(temp_len)] = interesting_8[UR(sizeof(interesting_8))];
        break;

      case 2:

        /* Set word to interesting value, randomly choosing endian. */

        if (temp_len < 2) break;

        if (UR(2)) {

          *(u16*)(out_buf + UR(temp_len - 1)) =
            interesting_16[UR(sizeof(interesting_16) >> 1)];

        } else {

          *(u16*)(out_buf + UR(temp_len - 1)) = SWAP16(
            interesting_16[UR(sizeof(interesting_16) >> 1)]);

        }

        break;

      case 3:

        /* Set dword to interesting value, randomly choosing endian. */

        if (temp_len < 4) break;

        if (UR(2)) {

          *(u32*)(out_buf + UR(temp_len - 3)) =
            interesting_32[UR(sizeof(interesting_32) >> 2)];

        } else {

          *(u32*)(out_buf + UR(temp_len - 3)) = SWAP32(
            interesting_32[UR(sizeof(interesting_32) >> 2)]);

        }

        break;

      case 4:

        /* Randomly subtract from byte. */

        out_buf[UR(temp_len)] -= 1 + UR(ARITH_MAX);
        break;

      case 5:

        /* Randomly add to byte. */

        out_buf[UR(temp_len)] += 1 + UR(ARITH_MAX);
        break;

      case 6:

        /* Randomly subtract from word, random endian. */

        if (temp_len < 2) break;

        if (UR(2)) {

          u32 pos = UR(temp_len - 1);

          *(u16*)(out_buf + pos) -= 1 + UR(ARITH_MAX);

        } else {

          u32 pos = UR(temp_len - 1);
          u16 num = 1 + UR(ARITH_MAX);

          *(u16*)(out_buf + pos) =
            SWAP16(SWAP16(*(u16*)(out_buf + pos)) - num);

        }

        break;

      case 7:

        /* Randomly add to word, random endian. */

        if (temp_len < 2) break;

        if (UR(2)) {

          u32 pos = UR(temp_len - 1);

          *(u16*)(out_buf + pos) += 1 + UR(ARITH_MAX);

        } else {

          u32 pos = UR(temp_len - 1);
          u16 num = 1 + UR(ARITH_MAX);

          *(u16*)(out_buf + pos) =
            SWAP16(SWAP16(*(u16*)(out_buf + pos)) + num);

        }

        break;

      case 8:

        /* Randomly subtract from dword, random endian. */

        if (temp_len < 4) break;

        if (UR(2)) {

          u32 pos = UR(temp_len - 3);

          *(u32*)(out_buf + pos) -= 1 + UR(ARITH_MAX);

        } else {

          u32 pos = UR(temp_len - 3);
          u32 num = 1 + UR(ARITH_MAX);

          *(u32*)(out_buf + pos) =
            SWAP32(SWAP32(*(u32*)(out_buf + pos)) - num);

        }

        break;

      case 9:

        /* Randomly add to dword, random endian. */

        if (temp_len < 4) break;

        if (UR(2)) {

          u32 pos = UR(temp_len - 3);

          *(u32*)(out_buf + pos) += 1 + UR(ARITH_MAX);

        } else {

          u32 pos = UR(temp_len - 3);
          u32 num = 1 + UR(ARITH_MAX);

          *(u32*)(out_buf + pos) =
            SWAP32(SWAP32(*(u32*)(out_buf + pos)) + num);

        }

        break;

      case 10:

        /* Just set a random byte to a random value. Because,
           why not. We use XOR with 1-255 to eliminate the
           possibility of a no-op. */

        out_buf[UR(temp_len)] ^= 1 + UR(255);
        break;

      case 11 ... 12: {

          /* Delete bytes. We're making this a bit more likely
             than insertion (the next option) in hopes of keeping
             files reasonably small. */

          u32 del_from, del_len;

          if (temp_len < 2) break;

          /* Don't delete too much. */

          del_len = choose_block_len(temp_len - 1);

          del_from = UR(temp_len - del_len + 1);

          memmove(out_buf + del_from, out_buf + del_from + del_len,
                  temp_len - del_from - del_len);

          temp_len -= del_len;

          break;

        }

      case 13:

        if (temp_len + HAVOC_BLK_XL < MAX_FILE) {

          /* Clone bytes (75%) or insert a block of constant bytes (25%). */

          u8  actually_clone = UR(4);
          u32 clone_from, clone_to, clone_len;
          u8* new_buf;

          if (actually_clone) {

            clone_len  = choose_block_len(temp_len);
            clone_from = UR(temp_len - clone_len + 1);

          } else {

            clone_len = choose_block_len(HAVOC_BLK_XL);
            clone_from = 0;

          }

          clone_to   = UR(temp_len);

          new_buf = ck_alloc_nozero(temp_len + clone_len);

          /* Head */

          memcpy(new_buf, out_buf, clone_to);

          /* Inserted part */

          if (actually_clone)
            memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
          else
            memset(new_buf + clone_to,
                   UR(2) ? UR(256) : out_buf[UR(temp_len)], clone_len);

          /* Tail */
          memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
                 temp_len - clone_to);

          ck_free(out_buf);
          out_buf = new_buf;
          temp_len += clone_len;

        }

        break;

      case 14: {

          /* Overwrite bytes with a randomly selected chunk (75%) or fixed
             bytes (25%). */

          u32 copy_from, copy_to, copy_len;

          if (temp_len < 2) break;

          copy_len  = choose_block_len(temp_len - 1);

          copy_from = UR(temp_len - copy_len + 1);
          copy_to   = UR(temp_len - copy_len + 1);

          if (UR(4)) {

            if (copy_from != copy_to)
              memmove(out_buf + copy_to, out_buf + copy_from, copy_len);

          } else memset(out_buf + copy_to,
                        UR(2) ? UR(256) : out_buf[UR(temp_len)], copy_len);

          break;

        }

      /* Values 15 and 16 can be selected only if there are any extras
         present in the dictionaries. */

      case 15: {

          /* Overwrite bytes with an extra. */

          if (!extras_cnt || (a_extras_cnt && UR(2))) {

            /* No user-specified extras or odds in our favor. Let's use an
               auto-detected one. */

            u32 use_extra = UR(a_extras_cnt);
            u32 extra_len = a_extras[use_extra].len;
            u32 insert_at;

            if (extra_len > temp_len) break;

            insert_at = UR(temp_len - extra_len + 1);
            memcpy(out_buf + insert_at, a_extras[use_extra].data, extra_len);

          } else {

            /* No auto extras or odds in our favor. Use the dictionary. */

            u32 use_extra = UR(extras_cnt);
            u32 extra_len = extras[use_extra].len;
            u32 insert_at;

            if (extra_len > temp_len) break;

            insert_at = UR(temp_len - extra_len + 1);
            memcpy(out_buf + insert_at, extras[use_extra].data, extra_len);

          }

          break;

        }

      case 16: {

          u32 use_extra, extra_len, insert_at = UR(temp_len + 1);
          u8* new_buf;

          /* Insert an extra. Do the same dice-rolling stuff as for the
             previous case. */

          if (!extras_cnt || (a_extras_cnt && UR(2))) {

            use_extra = UR(a_extras_cnt);
            extra_len = a_extras[use_extra].len;

            if (temp_len + extra_len >= MAX_FILE) break;

            new_buf = ck_alloc_nozero(temp_len + extra_len);

            /* Head */
            memcpy(new_buf, out_buf, insert_at);

            /* Inserted part */
            memcpy(new_buf + insert_at, a_extras[use_extra].data, extra_len);

          } else {

            use_extra = UR(extras_cnt);
            extra_len = extras[use_extra].len;

            if (temp_len + extra_len >= MAX_FILE) break;

            new_buf = ck_alloc_nozero(temp_len + extra_len);

            /* Head */
            memcpy(new_buf, out_buf, insert_at);

            /* Inserted part */
            memcpy(new_buf + insert_at, extras[use_extra].data, extra_len);

          }

          /* Tail */
          memcpy(new_buf + insert_at + extra_len, out_buf + insert_at,
                 temp_len - insert_at);

          ck_free(out_buf);
          out_buf   = new_buf;
          temp_len += extra_len;

          break;

        }

    }

  }

  if (common_fuzz_stuff(argv, out_buf, temp_len))
    goto abandon_entry;

  /* out_buf might have been mangled a bit, so let's restore it to its
     original size and shape. */

  if (temp_len < len) out_buf = ck_realloc(out_buf, len);
  temp_len = len;
  memcpy(out_buf, in_buf, len);

  /* If we're finding new stuff, let's run for a bit longer, limits
     permitting. */

  if (queued_paths != havoc_queued) {

    if (perf_score <= HAVOC_MAX_MULT * 100) {
      stage_max  *= 2;
      perf_score *= 2;
    }

    havoc_queued = queued_paths;

  }

}

Splice Stage

这一部分是铰接阶段，用来将几个testcase的不同部分拼接在一起，并在之后通过havoc阶段进行变异

 if (use_splicing && splice_cycle++ < SPLICE_CYCLES &&
     queued_paths > 1 && queue_cur->len > 1) {

   struct queue_entry* target;
   u32 tid, split_at;
   u8* new_buf;
   s32 f_diff, l_diff;

   /* First of all, if we've modified in_buf for havoc, let's clean that
      up... */

   if (in_buf != orig_in) {
     ck_free(in_buf);
     in_buf = orig_in;
     len = queue_cur->len;
   }
   // 首先为了havoc清理in_buf

   /* Pick a random queue entry and seek to it. Don't splice with yourself. */

   do { tid = UR(queued_paths); } while (tid == current_entry);
   // 选择一个随机queue内实例
   splicing_with = tid;
   target = queue;

   while (tid >= 100) { target = target->next_100; tid -= 100; }
   while (tid--) target = target->next;
   
   /* Make sure that the target has a reasonable length. */

   while (target && (target->len < 2 || target == queue_cur)) {
     target = target->next;
     splicing_with++;
   }
   // 对长度的检查
   if (!target) goto retry_splicing;
// 如果直到遍历到最后都没有找到适合长度的，就重试
   /* Read the testcase into a new buffer. */

   fd = open(target->fname, O_RDONLY);
   
   if (fd < 0) PFATAL("Unable to open '%s'", target->fname);

   new_buf = ck_alloc_nozero(target->len);

   ck_read(fd, new_buf, target->len, target->fname);

   close(fd);

   /* Find a suitable splicing location, somewhere between the first and
      the last differing byte. Bail out if the difference is just a single
      byte or so. */

   locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
   // 找到适合的拼接位置， 
   // 首先找到第一个和最后一个不同的byte之间，并且避免只是单byte的不同
   if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
     ck_free(new_buf);
     goto retry_splicing;
   }

   /* Split somewhere between the first and last differing byte. */
   // 然后在这个区间随机选择一个位置来进行拼接 
   split_at = f_diff + UR(l_diff - f_diff);
   
   /* Do the thing. */

   len = target->len;
   memcpy(new_buf, in_buf, split_at);
   in_buf = new_buf;

   ck_free(out_buf);
   out_buf = ck_alloc_nozero(len);
   memcpy(out_buf, in_buf, len);

   goto havoc_stage;
   // 最后通过havoc阶段进行变异

 }

在之后再一次运行到此时由于不再满足此if判断，于是结束循环

1 2	if (use_splicing && splice_cycle++ < SPLICE_CYCLES && queued_paths > 1 && queue_cur->len > 1)

最终清理资源，并结束fuzz_one的运行


  ret_val = 0;

abandon_entry:

  splicing_with = -1;

  /* Update pending_not_fuzzed count if we made it through the calibration
     cycle and have not seen this entry before. */

  if (!stop_soon && !queue_cur->cal_failed && !queue_cur->was_fuzzed) {
    queue_cur->was_fuzzed = 1;
    pending_not_fuzzed--;
    if (queue_cur->favored) pending_favored--;
  }

  munmap(orig_in, queue_cur->len);

  if (in_buf != orig_in) ck_free(in_buf);
  ck_free(out_buf);
  ck_free(eff_map);

  return ret_val;

trim_case | 对于testcase的修剪

trim_case以2的幂次位置为单位进行裁剪, 每次修减后通过run_target 运行, 测试结果是否与原来相同。

最后如果发生了修剪，再更新bitmap_score

static u8 trim_case(char **argv, struct queue_entry *q, u8 *in_buf)
{

  static u8 tmp[64];
  static u8 clean_trace[MAP_SIZE];

  u8 needs_write = 0, fault = 0;
  u32 trim_exec = 0;
  u32 remove_len;
  u32 len_p2;

  /* Although the trimmer will be less useful when variable behavior is
     detected, it will still work to some extent, so we don't check for
     this. */

  if (q->len < 5)
    return 0;

  stage_name = tmp;
  bytes_trim_in += q->len;

  /* Select initial chunk len, starting with large steps. */

  len_p2 = next_p2(q->len);
  // 以2的幂次向上取整
  remove_len = MAX(len_p2 / TRIM_START_STEPS, TRIM_MIN_BYTES);
  
  /* Continue until the number of steps gets too high or the stepover
     gets too small. */

  while (remove_len >= MAX(len_p2 / TRIM_END_STEPS, TRIM_MIN_BYTES))
  {

    u32 remove_pos = remove_len;

    sprintf(tmp, "trim %s/%s", DI(remove_len), DI(remove_len));

    stage_cur = 0;
    stage_max = q->len / remove_len;

    while (remove_pos < q->len)
    {

      u32 trim_avail = MIN(remove_len, q->len - remove_pos);
      u32 cksum;

      write_with_gap(in_buf, q->len, remove_pos, trim_avail);
      // 将修剪后的输入写入outfile
      fault = run_target(argv, exec_tmout);
      // 运行fuzz 
      trim_execs++;

      if (stop_soon || fault == FAULT_ERROR)
        goto abort_trimming;

      /* Note that we don't keep track of crashes or hangs here; maybe TODO? */

      cksum = hash32(trace_bits, MAP_SIZE, HASH_CONST);
      
      /* If the deletion had no impact on the trace, make it permanent. This
         isn't perfect for variable-path inputs, but we're just making a
         best-effort pass, so it's not a big deal if we end up with false
         negatives every now and then. */

      if (cksum == q->exec_cksum)
      {
      // 检查运行时bitmap是否与原来相等
        u32 move_tail = q->len - remove_pos - trim_avail;

        q->len -= trim_avail;
        len_p2 = next_p2(q->len);

        memmove(in_buf + remove_pos, in_buf + remove_pos + trim_avail,
                move_tail);
        // 如果是，则更新testcase的len以及内存中的testcase 
        /* Let's save a clean trace, which will be needed by
           update_bitmap_score once we're done with the trimming stuff. */

        if (!needs_write)
        {
        // 如果之前没有设置need_write，设置此标志 
          needs_write = 1;
          memcpy(clean_trace, trace_bits, MAP_SIZE);
          // 保存trace_bits
        }
      }
      else
        remove_pos += remove_len;

      /* Since this can be slow, update the screen every now and then. */

      if (!(trim_exec++ % stats_update_freq))
        show_stats();
      stage_cur++;
    }

    remove_len >>= 1;
  }

  /* If we have made changes to in_buf, we also need to update the on-disk
     version of the test case. */

  if (needs_write)
  {
  // 如果发生了修剪，需要同步到磁盘里保存的testcase, 并且更新bitmap_score
    s32 fd;

    unlink(q->fname); /* ignore errors */

    fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);

    if (fd < 0)
      PFATAL("Unable to create '%s'", q->fname);

    ck_write(fd, in_buf, q->len, q->fname);
    close(fd);

    memcpy(trace_bits, clean_trace, MAP_SIZE);
    update_bitmap_score(q);
  }

abort_trimming:
  bytes_trim_out += q->len;
  return fault;
}

calculate_score | 对于testcase分数的计算

static u32 calculate_score(struct queue_entry *q)
{

  u32 avg_exec_us = total_cal_us / total_cal_cycles;
  u32 avg_bitmap_size = total_bitmap_size / total_bitmap_entries;
  u32 perf_score = 100;

  /* Adjust score based on execution speed of this path, compared to the
     global average. Multiplier ranges from 0.1x to 3x. Fast inputs are
     less expensive to fuzz, so we're giving them more air time. */

  if (q->exec_us * 0.1 > avg_exec_us)
    perf_score = 10;
  else if (q->exec_us * 0.25 > avg_exec_us)
    perf_score = 25;
  else if (q->exec_us * 0.5 > avg_exec_us)
    perf_score = 50;
  else if (q->exec_us * 0.75 > avg_exec_us)
    perf_score = 75;
  else if (q->exec_us * 4 < avg_exec_us)
    perf_score = 300;
  else if (q->exec_us * 3 < avg_exec_us)
    perf_score = 200;
  else if (q->exec_us * 2 < avg_exec_us)
    perf_score = 150;

  /* Adjust score based on bitmap size. The working theory is that better
     coverage translates to better targets. Multiplier from 0.25x to 3x. */

  if (q->bitmap_size * 0.3 > avg_bitmap_size)
    perf_score *= 3;
  else if (q->bitmap_size * 0.5 > avg_bitmap_size)
    perf_score *= 2;
  else if (q->bitmap_size * 0.75 > avg_bitmap_size)
    perf_score *= 1.5;
  else if (q->bitmap_size * 3 < avg_bitmap_size)
    perf_score *= 0.25;
  else if (q->bitmap_size * 2 < avg_bitmap_size)
    perf_score *= 0.5;
  else if (q->bitmap_size * 1.5 < avg_bitmap_size)
    perf_score *= 0.75;

  /* Adjust score based on handicap. Handicap is proportional to how late
     in the game we learned about this path. Latecomers are allowed to run
     for a bit longer until they catch up with the rest. */

  if (q->handicap >= 4)
  {

    perf_score *= 4;
    q->handicap -= 4;
  }
  else if (q->handicap)
  {

    perf_score *= 2;
    q->handicap--;
  }

  /* Final adjustment based on input depth, under the assumption that fuzzing
     deeper test cases is more likely to reveal stuff that can't be
     discovered with traditional fuzzers. */

  switch (q->depth)
  {

  case 0 ... 3:
    break;
  case 4 ... 7:
    perf_score *= 2;
    break;
  case 8 ... 13:
    perf_score *= 3;
    break;
  case 14 ... 25:
    perf_score *= 4;
    break;
  default:
    perf_score *= 5;
  }

  /* Make sure that we don't go over limit. */

  if (perf_score > HAVOC_MAX_MULT * 100)
    perf_score = HAVOC_MAX_MULT * 100;

  return perf_score;
}

common_fuzz_stuff | 一个testcase的运行

在fuzz过程中，用来通知fork_server运行一次测试，并且保存有效的种子

EXP_ST u8 common_fuzz_stuff(char **argv, u8 *out_buf, u32 len)
{

  u8 fault;

  if (post_handler)
  {

    out_buf = post_handler(out_buf, &len);
    // 此handler通常是afl_processers
    if (!out_buf || !len)
      return 0;
  }

  write_to_testcase(out_buf, len);
  // 保存此testcase
  fault = run_target(argv, exec_tmout);
  // 运行一次测试

  // 返回1说明需要快速终止
  if (stop_soon)
    return 1;

  if (fault == FAULT_TMOUT)
  {

    if (subseq_tmouts++ > TMOUT_LIMIT)
    {
      cur_skipped_paths++;
      return 1;
    }
  }
  else
    subseq_tmouts = 0;

  /* Users can hit us with SIGUSR1 to request the current input
     to be abandoned. */

  if (skip_requested)
  {

    skip_requested = 0;
    cur_skipped_paths++;
    return 1;
  }

  /* This handles FAULT_ERROR for us: */

  queued_discovered += save_if_interesting(argv, out_buf, len, fault);
  // 如果存在interesting 的种子，保存起来
  if (!(stage_cur % stats_update_freq) || stage_cur + 1 == stage_max)
    show_stats();

  return 0;
}

2024-03-04 15:47:22 # Fuzz #Fuzz #Coding