以几个主要的经典版本为基础

另外只关注实现部分,安全检查部分见对应文章

glibc2.23

2.23是一个十分经典的版本,以其作为基础

malloc

__libc_malloc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;

void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));

arena_get (ar_ptr, bytes);

victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena
before. */
if (!victim && ar_ptr != NULL)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}

if (ar_ptr != NULL)
(void) mutex_unlock (&ar_ptr->mutex);

assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}
  1. 首先判断__malloc_hook符号是否为NULL,如果不为NULL则调用hook函数
  2. 获得arena
  3. 调用__int_malloc获得victim
  4. 如果victim为NULL或者arena不为NULL,再次尝试进行2,3分配
  5. 如果arena不为空,解锁arena的互斥锁
  6. assert依次判定victim为NULL?vicitim是mmap分配?arena是否匹配?如果三个皆不满足则abort

__int_malloc

初始工作

1
2
3
4
5
6
7
8
9
10
11
 checked_request2size (bytes, nb);

/* There are no usable arenas. Fall back to sysmalloc to get a chunk from
mmap. */
if (__glibc_unlikely (av == NULL))
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#define REQUEST_OUT_OF_RANGE(req)                                 \
((unsigned long) (req) >= \
(unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))

/* pad request bytes into a usable size -- internal version */

#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

/* Same, except also perform argument check */

#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);

检查申请大小是否合规并转化为真正的chunksize

虽然代码上是先检查申请大小是否合规再将其转化为chunksize,但真正运行中更多时候顺序是反过来的

如果arena是NULL采用mmap分配并返回

fastbin(FILO)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb); //计算出索引
mfastbinptr *fb = &fastbin (av, idx);//获得单链表头节点
mchunkptr pp = *fb;//获得第一个chunk
do
{
victim = pp;
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);
if (victim != 0)
{
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))//检查
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
check_remalloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);//chunk2mem转换地址
alloc_perturb (p, bytes);
return p;//返回
}
}

通过宏fastbin_index(sz)得到对应的fastbin索引

1
2
#define fastbin_index(sz) \
((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

然后拿到该索引链的地址

victim赋值为该链的第一个chunk

1
2
3
4
5
6
7
8
do
{
victim = pp;
if (victim == NULL)
break;
}
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
!= victim);

其中用到了一些原子操作

1
2
3
4
5
6
7
8
#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
({ __typeof (mem) __gmemp = (mem); \
__typeof (*mem) __gret = *__gmemp; \
__typeof (*mem) __gnewval = (newval); \
\
if (__gret == (oldval)) \
*__gmemp = __gnewval; \
__gret; })

就是比较mem指向的值是否等于oldval,如果等于则将mem指向的值变为newval,并返回oldval

如果victim不为NULL,检查其大小计算得出的索引是否符合当前索引链

最后有个检查调用check_remalloced_chunk (av, victim, nb);

但因为宏编译的一些变量,它一般是空代码

smallbin(FIFO)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
 if (in_smallbin_range (nb))
{
idx = smallbin_index (nb);//计算索引
bin = bin_at (av, idx);//初始化链表头

if ((victim = last (bin)) != bin)//检测链为空
{
if (victim == 0) /* initialization check */
malloc_consolidate (av);
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
{
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);//设置下一个chunk的p位
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;//设置A位
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;//返回
}
}
}

如果当前链不为空才进入分配

  1. 如果是因为堆未初始化导致的误判链不为空,触发malloc_consolidate (av);
  2. 否则取出

largebin(FIFO)

1
2
3
4
5
6
else
{
idx = largebin_index (nb);
if (have_fastchunks (av))
malloc_consolidate (av);
}

可以看到,申请largebin时,并不会直接进入largebin中寻找,而是会:

  1. 判断是否有fastchunk,是则触发malloc_consolidate (av);
  2. 进入unsortedbin遍历循环,找到则返回
  3. 进入largebin分配

大循环

接下来几个部分,都是这个大循环的一部分

unsortedbin(FIFO)遍历

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);

/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/

if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&//当满足nb为smallbin范围,且unsorted中只有一个last_remainer时
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))
{
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

/* Take now instead of binning if exact fit */

if (size == nb)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes); //刚好相等
return p;
}

/* place chunk in bin */

if (in_smallbin_range (size))
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd; //放入smallbin
}
else//放入largebin
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck)
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert ((bck->bk->size & NON_MAIN_ARENA) == 0);
if ((unsigned long) (size) < (unsigned long) (bck->bk->size))//小于最小的
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
}
else
{
assert ((fwd->size & NON_MAIN_ARENA) == 0);//从大的开始遍历
while ((unsigned long) size < fwd->size)
{
fwd = fwd->fd_nextsize;
assert ((fwd->size & NON_MAIN_ARENA) == 0);
}

if ((unsigned long) size == (unsigned long) fwd->size)
/* Always insert in the second position. */
fwd = fwd->fd;//永远插在第一个相同大小的后一个位置,不理会nextsize链
else
{
victim->fd_nextsize = fwd;//fd_nextsize指向最近的更小的chunk
victim->bk_nextsize = fwd->bk_nextsize;//fwd肯定在nextsize链,尾除外
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;//上一个设置为nextsize链上的
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;//largebin为空
}

mark_bin (av, victim_index);//标记binmap
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;//链条完整

#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)//最多循环10000次
break;
}

遍历只有当完成处理10000次或者unsorted已空才退出

  1. 对victim的size进行检查
  2. 当申请大小处于smallbin范围&&victim为unsortedbin中最后一个chunk&&victim为last_remainer&&size大于申请大小+min_size,采用last_remainer分配
  3. 将victim移除unsorted
  4. 如果size刚好则直接返回
  5. 否则进行进入bin的成链准备
  6. 标志binmap,并彻底成链,unsorted遍历也是唯一一会mark_bin处

遍历后largebin分配

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
   if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin &&//不为空
(unsigned long) (victim->size) >= (unsigned long) (nb))//最大的大于需要的才进入
{
victim = victim->bk_nextsize;//从小的开始找
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin) && victim->size == victim->fd->size)
victim = victim->fd;//尽量不取出nextsize链上的chunk

remainder_size = size - nb;
unlink (av, victim, bck, fwd);//unlink会设置victim的nextsize链

/* Exhaust */
if (remainder_size < MINSIZE)//这种情况连多余部分一起分配
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));//设置取出部分
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);//设置remainder
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;//返回
}
}

如果对应链不为空且最大的chunk大于申请大小才进入分配

  1. 从最小的开始找起,直到找到第一个比所需大小更大的
  2. 如果有相同大小的chunk,尽量不拿取nextsize链上的
  3. unlink
  4. 正常流程切割,切割时如果remainer_size小于min_size则连remainer部分一起分配,否则的话切割还要多写一些信息

binmap情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
   ++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);

for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
}
while ((map = av->binmap[block]) == 0);

bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)//链为空
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}

else
{
size = chunksize (victim);

/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

/* unlink */
unlink (av, victim, bck, fwd);//unlink

/* Exhaust */
if (remainder_size < MINSIZE)//多余部分一起分配
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}

/* Split */
else
{
remainder = chunk_at_offset (victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;//链接进入unsorted
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;//申请大小在smallbin范围,则将剩余部分置为last_remainer
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;//nextsize链
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
  1. 开头先++idx,因为来到这一步,刚好合适的chunk肯定是没有的,从下一个比当前idx大的找起
  2. 获得下一个bin,对应块,对应map(int对象),获得bit位(这个bit位的返回形式是一个int型数字,32个比特位中对应的那一位是1)
  3. 进入循环
    1. 若当前map没有足够大的chunk分配(bit>map)或者bit在上一次循环中移出界了(bit==0),向下一个map寻找,两种情况会退出,一是map已是最大直接退出前往use_top,二是下一个map存在足够大的chunk,并将bin调整为当前map的第一个,并执行接下来的流程(即接下来的流程若是执行,则一定能找到合适的分配)
    2. 若当前bit没有对应chunk,bit左移一位,bin向下一个移动,知道找到第一个有对应chunk的链
    3. 取victim为找到的链的最后一个chunk,若其为空与标志位不符,则将bin向下一个移动,bit左移一位,并将当前标志位取消,进入下一次循环
    4. 若不为空,则进行正常的切割流程

use_top

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize (victim);

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (have_fastchunks (av))//再触发consolidate
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}
  1. 如果topchunk的size足够分配,那么从topchunk中进行切割
  2. 若不够,如果有fast chunk则进行malloc_consolidate,并再次计算idx,回到大循环起始,一般是申请small chunk进入大循环才有可能会触发这个选项
  3. 若不够,且没有fast chunk,采用sysmalloc

sysmalloc

1
2
3
4
5
6
7
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}

free

__libc_free

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void
__libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}

if (mem == 0) /* free(0) has no effect */
return;

p = mem2chunk (mem);

if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
/* see if the dynamic brk/mmap threshold needs adjusting */
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);
return;
}

ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}
libc_hidden_def (__libc_free)
  1. 检查__free_hook,有则调用
  2. free(0)直接返回
  3. 如果是mmap分配的,特殊处理
  4. 调用_int_free (ar_ptr, p, 0);

__int_free

初始操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}

check_inuse_chunk(av, p);
  1. 如果chunk地址非法或者不对齐,报错
  2. 如果size小于minsize或者不对齐,报错
  3. check_inuse_chunk(av, p);检查

fastbin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {

if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
/* We might not have a lock at this point and concurrent modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
if (have_lock
|| ({ assert (locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (! have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}

free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

set_fastchunks(av);
unsigned int idx = fastbin_index(size);//计算索引
fb = &fastbin (av, idx);//初始化

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}

malloc时的反操作??

smallbin&&largebin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
  /*
Consolidate other non-mmapped chunks as they arrive.
*/

else if (!chunk_is_mmapped(p)) {
if (! have_lock) {
(void)mutex_lock(&av->mutex);
locked = 1;
}

nextchunk = chunk_at_offset(p, size);

/* Lightweight tests: check whether the block is already the
top block. */
if (__glibc_unlikely (p == av->top))
{
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
if (__builtin_expect (contiguous (av)
&& (char *) nextchunk
>= ((char *) av->top + chunksize(av->top)), 0))
{
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
if (__glibc_unlikely (!prev_inuse(nextchunk)))
{
errstr = "double free or corruption (!prev)";
goto errout;
}

nextsize = chunksize(nextchunk);
if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (nextsize >= av->system_mem, 0))
{
errstr = "free(): invalid next size (normal)";
goto errout;
}
//进行了一系列的检测
free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top) {
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

/* consolidate forward */
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);//清除p位

/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/

bck = unsorted_chunks(av);// 统一放入unsortedbin
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;//largebin清空nextsize链
}
bck->fd = p;
fwd->bk = p;//fd,bk链设置

set_head(p, size | PREV_INUSE);//设置p位
set_foot(p, size);//设置下一个chunk的prev_size

check_free_chunk(av, p);
}

/*
If the chunk borders the current high end of memory,
consolidate into top
*/

else {//nextchunk是topchunk的情况
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}

/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.

Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/

if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);//触发malloc_consolidate

if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))//收缩堆
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}

if (! have_lock) {
assert (locked);
(void)mutex_unlock(&av->mutex);
}
}
  1. 伴随一系列的检查,从低地址到高地址判断是否可进行合并
  2. 合并后的chunk的size若大于FASTBIN_CONSOLIDATION_THRESHOLD
    1. 如果有fast chunk则触发malloc_consolidate
    2. 如果topsize大于mp.trim_threshold,尝试systrim收缩堆,否则尝试heap_trim收缩堆

附属

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#define unlink(AV, P, BK, FD) {                                            \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \
malloc_printerr (check_action, "corrupted double-linked list", P, AV); \
else { \
FD->bk = BK; \
BK->fd = FD; \
if (!in_smallbin_range (P->size) \
&& __builtin_expect (P->fd_nextsize != NULL, 0)) { \
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr (check_action, \
"corrupted double-linked list (not small)", \
P, AV); \
if (FD->fd_nextsize == NULL) { \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else { \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} else { \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}

脱链操作

malloc_consolidate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
static void malloc_consolidate(mstate av)
{
mfastbinptr* fb; /* current fastbin being consolidated */
mfastbinptr* maxfb; /* last fastbin (for loop control) */
mchunkptr p; /* current chunk being consolidated */
mchunkptr nextp; /* next chunk to consolidate */
mchunkptr unsorted_bin; /* bin header */
mchunkptr first_unsorted; /* chunk to link to */

/* These have same use as in free() */
mchunkptr nextchunk;
INTERNAL_SIZE_T size;
INTERNAL_SIZE_T nextsize;
INTERNAL_SIZE_T prevsize;
int nextinuse;
mchunkptr bck;
mchunkptr fwd;

/*
If max_fast is 0, we know that av hasn't
yet been initialized, in which case do so below
*/

if (get_max_fast () != 0) {
clear_fastchunks(av);

unsorted_bin = unsorted_chunks(av);

/*
Remove each chunk from fast bin and consolidate it, placing it
then in unsorted bin. Among other reasons for doing this,
placing in unsorted bin avoids needing to calculate actual bins
until malloc is sure that chunks aren't immediately going to be
reused anyway.
*/

maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
p = atomic_exchange_acq (fb, 0);
if (p != 0) {
do {
check_inuse_chunk(av, p);
nextp = p->fd;

/* Slightly streamlined version of consolidation code in free() */
size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);

if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top) {
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

if (!nextinuse) {
size += nextsize;
unlink(av, nextchunk, bck, fwd);
} else
clear_inuse_bit_at_offset(nextchunk, 0);

first_unsorted = unsorted_bin->fd;
unsorted_bin->fd = p;
first_unsorted->bk = p;

if (!in_smallbin_range (size)) {
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}

set_head(p, size | PREV_INUSE);
p->bk = unsorted_bin;
p->fd = first_unsorted;
set_foot(p, size);
}

else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
}

} while ( (p = nextp) != 0);

}
} while (fb++ != maxfb);
}
else {
malloc_init_state(av);
check_malloc_state(av);
}
}

将fastbin中的chunk逐个取出遍历

  1. 判断prev_chunk是否可合并
  2. 判断next_chunk是不是top_chunk
    1. 若不是则判断next_chunk是否在使用,不在使用则合并,在使用则清空next_chunk的prev_inuse位
    2. 若是则直接并到top_chunk

可以看到与正常从fastbin中取出相比其中几乎没有检查

glibc2.27

变化比较大的就是2.26的时候多了个tcache

malloc

__libc_malloc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);

MAYBE_INIT_TCACHE (); // 优先判断tcache是否初始化

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL)
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif

if (SINGLE_THREAD_P)
{
victim = _int_malloc (&main_arena, bytes);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
}

在_libc_malloc中添加了tcache操作,可以直接在_libc_malloc获得chunk返回

并且多了SINGLE_THREAD_P的判断分支

__int_malloc

fastbin

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#define REMOVE_FB(fb, victim, pp)			\
do \
{ \
victim = pp; \
if (victim == NULL) \
break; \
} \
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
!= victim); \

if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{
idx = fastbin_index (nb);
mfastbinptr *fb = &fastbin (av, idx);
mchunkptr pp;
victim = *fb;

if (victim != NULL)
{
if (SINGLE_THREAD_P)
*fb = victim->fd;
else
REMOVE_FB (fb, pp, victim);
if (__glibc_likely (victim != NULL))
{
size_t victim_idx = fastbin_index (chunksize (victim));
if (__builtin_expect (victim_idx != idx, 0))
malloc_printerr ("malloc(): memory corruption (fast)");
check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

fastbin取出发生较大变化

  1. 之前的fastbin移除操作,现在分为两种情况
    • 如果是SINGLE_THREAD_P模式,直接*fb = victim->fd;最简单的单链表移除
    • 之前的常规fastbin移除操作,现在采取REMOVE_FB宏来表示
  2. 新增fastbin填充tcache机制,当从fastbin中取出chunk后,如果该fastbin中还有剩余chunk,且对应tcache中有剩余空间,则会将fastbin中的chunk移入tcachebin

smallbin

1
2
3
4
if ((victim = last (bin)) != bin)
{
- if (victim == 0) /* initialization check */
- malloc_consolidate (av);

去除bins未初始化则触发malloc_consolidate的机制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put (tc_victim, tc_idx);
}
}
}
#endif

smallbin同样新增tcache填充机制,当从smallbin中取出chunk后,如果该smallbinbin中还有剩余chunk,且对应tcache中有剩余空间,则会将smallbin中的chunk移入tcachebin

注意填充过程中是没有对smallbin的完整性进行检查的

大循环

1
2
3
4
5
6
7
8
9
#if USE_TCACHE
INTERNAL_SIZE_T tcache_nb = 0;
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
tcache_nb = nb;
int return_cached = 0;

tcache_unsorted_count = 0;
#endif

增加了一些参数的布置

unsortedbin遍历

1
2
3
4
5
6
7
8
9
10
11
12
13
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif

如果在遍历时,找到了刚好满足需求的chunk-A时,如果对应tcache中有剩余空间,则将A先放入tcache,如果没有剩余空间则直接返回A

1
2
3
4
5
6
7
8
9
10
11
#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif

每一次unsorted遍历结束后,都会判断是否已经有找到满足需求的chunk,如果有且满足:

mp.tcache_unsorted_limit大于0,且unsorted已遍历次数大于mp\.tcache_unsorted_limit

则会直接从tcache中取出chunk中断unsorted遍历并返回

在调试时发现一般情况下mp_.tcache_unsorted_limit==0,也就是上述情况不满足,使得unsorted遍历会正常完成

然后直到unsorted遍历完成后,才会判断是否有找到满足需求的chunk,并从tcache返回

free

__libc_free

__libc_free中并没有实质性的变化

1
MAYBE_INIT_TCACHE()

新增判断tcache是否初始化

__int_free

tcache

1
2
3
4
5
6
7
8
9
10
11
12
13
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);

if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif

释放时优先放入tcachebin,

优先度高于各种合并(包括与top_chunk合并)


fastbin

fastbin放入的操作发生了与malloc时类似的变化

附属

malloc_state

1
2
3
/* Set if the fastbin chunks contain recently inserted free blocks.  */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;

这样的话unsortedbin泄露地址偏移就需要进行一定调整了,虽然是int类型但因为对齐要求占用了8字节

malloc_consolidate

1
2
3
4
5
{
unsigned int idx = fastbin_index (chunksize (p));
if ((&fastbin (av, idx)) != fb)
malloc_printerr ("malloc_consolidate(): invalid chunk size");
}

新增判断fastbin取出的chunk的大小是否符合当前fastbin链

1
2
3
4
5
6
7
8
- if (get_max_fast () != 0) {  
........
........
- }
- else {
- malloc_init_state(av);
- check_malloc_state(av);
- }

去除了一些操作

tcache

tcache结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#if USE_TCACHE

/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];//# define TCACHE_MAX_BINS 64
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread bool tcache_shutting_down = false;
static __thread tcache_perthread_struct *tcache = NULL;

/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void

注意:tcache_entry的next字段指向的是chunk的mem区域,而非malloc_chunk头

MAYBE_INIT_TCACHE()

1
2
3
# define MAYBE_INIT_TCACHE() \
if (__glibc_unlikely (tcache == NULL)) \
tcache_init();

判断是否初始化tcache

tcache_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
static void
tcache_init(void)
{
mstate ar_ptr;
void *victim = 0;
const size_t bytes = sizeof (tcache_perthread_struct);

if (tcache_shutting_down)//不适用tcache
return;

arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (!victim && ar_ptr != NULL)
{
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}


if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);

/* In a low memory situation, we may not be able to allocate memory
- in which case, we just keep trying later. However, we
typically do this very early, so either there is sufficient
memory, or there isn't enough memory to do non-trivial
allocations anyway. */
if (victim)
{
tcache = (tcache_perthread_struct *) victim;//将tcache_perthread_struct的地址赋值给tcache
memset (tcache, 0, sizeof (tcache_perthread_struct));
}

}
  1. 首先在堆上分配sizeof (tcache_perthread_struct)+0x10的chunk,大小一般是64*8+64*2+0x10=0x290即每一个counts两个字节(也有可能是64*8+64+0x10=0x250,即每一个counts一个字节,不记得有没有遇到过)
  2. 将其中的内存全部置零

tcache_put

1
2
3
4
5
6
7
8
9
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

tcache_get

1
2
3
4
5
6
7
8
9
10
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}

tcache_shutdown

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static void
tcache_thread_shutdown (void)
{
int i;
tcache_perthread_struct *tcache_tmp = tcache;

if (!tcache)
return;

/* Disable the tcache and prevent it from being reinitialized. */
tcache = NULL;
tcache_shutting_down = true;

/* Free all of the entries and the tcache itself back to the arena
heap for coalescing. */
for (i = 0; i < TCACHE_MAX_BINS; ++i)
{
while (tcache_tmp->entries[i])
{
tcache_entry *e = tcache_tmp->entries[i];
tcache_tmp->entries[i] = e->next;
__libc_free (e);
}
}

__libc_free (tcache_tmp);
}

基本不会用到这个函数

glibc2.29

malloc

__libc_malloc

无显著变化

__int_malloc

无显著变化

free

__libc_free

无显著变化

__int_free

tcache

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);
if (tcache != NULL && tc_idx < mp_.tcache_bins)
{
/* Check to see if it's already in the tcache. */
tcache_entry *e = (tcache_entry *) chunk2mem (p);

/* This test succeeds on double free. However, we don't 100%
trust it (it also matches random payload data at a 1 in
2^<size_t> chance), so verify it's not an unlikely
coincidence before aborting. */
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = tmp->next)
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}

if (tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
}
#endif

在优先放入tcache时,会判断chunk的key字段是否为tcache,如果是:

则遍历该tcache链中的所有chunk,判断是否存在double free

附属

tcache

tcache结构

1
2
3
4
5
6
typedef struct tcache_entry
{
struct tcache_entry *next;
/* This field exists to detect double frees. */
struct tcache_perthread_struct *key;
} tcache_entry;

tcache_entry成员多了个key,用于验证double free

tcache_put

e->key = tcache;

新增一句将key字段赋值为tcache

tcache_get

e->key = NULL;

新增一句将key字段置空

宏unlink现在由函数unlink_chunk实现

但内部具体代码并无明显变化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
static void
unlink_chunk (mstate av, mchunkptr p)
{
if (chunksize (p) != prev_size (next_chunk (p)))
malloc_printerr ("corrupted size vs. prev_size");

mchunkptr fd = p->fd;
mchunkptr bk = p->bk;

if (__builtin_expect (fd->bk != p || bk->fd != p, 0))
malloc_printerr ("corrupted double-linked list");

fd->bk = bk;
bk->fd = fd;
if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL)//fd_nextsize为null那么其本身就不在nextsize链
{
if (p->fd_nextsize->bk_nextsize != p
|| p->bk_nextsize->fd_nextsize != p)
malloc_printerr ("corrupted double-linked list (not small)");

if (fd->fd_nextsize == NULL)//fd为bin头的时候也不会进入,其实这种情况应该怎么也不会出现的,p在nextsize链上出现,p->fd任何情况也不会为null,****所以这部分代码的作用应该是防止修改bin头
//前提是确实不会有:在拥有相同大小chunk的情况下去分配nextsize链上的chunk的情况
{
if (p->fd_nextsize == p)//nextsize链上只有p(其实整个链也只有p)
fd->fd_nextsize = fd->bk_nextsize = fd;
else
{
fd->fd_nextsize = p->fd_nextsize;
fd->bk_nextsize = p->bk_nextsize;
p->fd_nextsize->bk_nextsize = fd;
p->bk_nextsize->fd_nextsize = fd;
}
}
else
{
p->fd_nextsize->bk_nextsize = p->bk_nextsize;
p->bk_nextsize->fd_nextsize = p->fd_nextsize;//这样指向不会修改到bin头
}
}
}

glibc2.31

malloc

__libc_malloc

无显著变化

__int_malloc

初始工作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#define checked_request2size(req, sz) \
({ \
(sz) = request2size (req); \
if (((sz) < (req)) \
|| REQUEST_OUT_OF_RANGE (sz)) \
{ \
__set_errno (ENOMEM); \
return 0; \
} \
})

static inline bool
checked_request2size (size_t req, size_t *sz) __nonnull (1)
{
if (__glibc_unlikely (req > PTRDIFF_MAX))
return false;
*sz = request2size (req);
return true;
}

宏checked_request2size现在由函数实现

free

__libc_free

无显著变化

__int_free

无显著变化

附属

无显著变化

glibc2.32

malloc

__libc_malloc

无显著变化

__int_malloc

fastbin

1
pp = REVEAL_PTR (victim->fd);

新增取出时fastbin chunk解密

free

__libc_free

无显著变化

__int_free

tcache

判断tcache double free时需要解密

fastbin

1
p->fd = PROTECT_PTR (&p->fd, old);

放入fastbin时需加密

附属

tcache&&fastbin加密

1
2
3
4
5
6
7
8
9
10
11
12
/* Safe-Linking:
Use randomness from ASLR (mmap_base) to protect single-linked lists
of Fast-Bins and TCache. That is, mask the "next" pointers of the
lists' chunks, and also perform allocation alignment checks on them.
This mechanism reduces the risk of pointer hijacking, as was done with
Safe-Unlinking in the double-linked lists of Small-Bins.
It assumes a minimum page size of 4096 bytes (12 bits). Systems with
larger pages provide less entropy, although the pointer mangling
still works. */
#define PROTECT_PTR(pos, ptr) \
((__typeof (ptr)) ((((size_t) pos) >> 12) ^ ((size_t) ptr)))
#define REVEAL_PTR(ptr) PROTECT_PTR (&ptr, ptr)

tcache和fastbin新增链表加密

加密的具体方法是:取chunk的fd字段(next字段)的地址右移12位,再与要加密的数据异或,得到结果

解密则是与加密恰好相反


并且可以发现:

对于一条fastbin或tcachebin单链表,从链表头看起,他的第一个chunk成员是不加密的,只有从第二个开始才会进行加密

tcache

tcache_put

1
e->next = PROTECT_PTR (&e->next, tcache->entries[tc_idx]);

放入tcache时加密

tcache_get

1
tcache->entries[tc_idx] = REVEAL_PTR (e->next);

tcache取出时解密

glibc2.33

malloc

__libc_malloc

无显著变化

__int_malloc

无显著变化

free

__libc_free

无显著变化

__int_free

无显著变化

附属

M_TAG

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#ifdef USE_MTAG

/* Default implementaions when memory tagging is supported, but disabled. */
static void *
__default_tag_region (void *ptr, size_t size)
{
return ptr;
}

static void *
__default_tag_nop (void *ptr)
{
return ptr;
}

static int __mtag_mmap_flags = 0;
static size_t __mtag_granule_mask = ~(size_t)0;

static void *(*__tag_new_memset)(void *, int, size_t) = memset;
static void *(*__tag_region)(void *, size_t) = __default_tag_region;
static void *(*__tag_new_usable)(void *) = __default_tag_nop;
static void *(*__tag_at)(void *) = __default_tag_nop;

# define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size)
# define TAG_REGION(ptr, size) __tag_region (ptr, size)
# define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr)
# define TAG_AT(ptr) __tag_at (ptr)
#else
# define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size)
# define TAG_REGION(ptr, size) (ptr)
# define TAG_NEW_USABLE(ptr) (ptr)
# define TAG_AT(ptr) (ptr)
#endif

新增M_TAG部分,不过基本为空

glibc2.34

malloc

__libc_malloc

1
2
3
4
-  void *(*hook) (size_t, const void *)
- = atomic_forced_read (__malloc_hook);
- if (__builtin_expect (hook != NULL, 0))
- return (*hook)(bytes, RETURN_ADDRESS (0));

去除了__malloc_hook符号及调用,其实所有相关hook都被去除了

__int_malloc

无显著变化

free

__libc_free

1
2
3
4
5
6
7
void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}

去除了__free_hook符号及调用

__int_free

无显著变化

附属

tcache

key验证

以往key_entry结构体中用于验证double free的key字段是用tcache填充,现在改为用tcache_key填充

tcache_key的产生如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

/* Process-wide key to try and catch a double-free in the same thread. */
static uintptr_t tcache_key;

/* The value of tcache_key does not really have to be a cryptographically
secure random number. It only needs to be arbitrary enough so that it does
not collide with values present in applications. If a collision does happen
consistently enough, it could cause a degradation in performance since the
entire list is checked to check if the block indeed has been freed the
second time. The odds of this happening are exceedingly low though, about 1
in 2^wordsize. There is probably a higher chance of the performance
degradation being due to a double free where the first free happened in a
different thread; that's a case this check does not cover. */
static void
tcache_key_initialize (void)
{
if (__getrandom (&tcache_key, sizeof(tcache_key), GRND_NONBLOCK)
!= sizeof (tcache_key))
{
tcache_key = random_bits ();
#if __WORDSIZE == 64
tcache_key = (tcache_key << 32) | random_bits ();
#endif
}
}

M_TAG

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#ifdef USE_MTAG
static bool mtag_enabled = false;
static int mtag_mmap_flags = 0;
#else
# define mtag_enabled false
# define mtag_mmap_flags 0
#endif

static __always_inline void *
tag_region (void *ptr, size_t size)
{
if (__glibc_unlikely (mtag_enabled))
return __libc_mtag_tag_region (ptr, size);
return ptr;
}

static __always_inline void *
tag_new_zero_region (void *ptr, size_t size)
{
if (__glibc_unlikely (mtag_enabled))
return __libc_mtag_tag_zero_region (__libc_mtag_new_tag (ptr), size);
return memset (ptr, 0, size);
}

/* Defined later. */
static void *
tag_new_usable (void *ptr);

static __always_inline void *
tag_at (void *ptr)
{
if (__glibc_unlikely (mtag_enabled))
return __libc_mtag_address_get_tag (ptr);
return ptr;
}

M_TAG实装了一部分,但依然没什么用

glibc2.35

malloc

__libc_malloc

无显著变化

__int_malloc

无显著变化

free

__libc_free

无显著变化

__int_free

无显著变化

附属

无显著变化

glibc2.37

malloc

__libc_malloc

无显著变化

__int_malloc

无显著变化

free

__libc_free

无显著变化

__int_free

无显著变化

附属

无显著变化

glibc2.38

malloc

__libc_malloc

无显著变化

__int_malloc

无显著变化

free

__libc_free

无显著变化

__int_free

无显著变化

附属

tcache

tcache_get

1
2
3
4
5
static __always_inline void *
tcache_get (size_t tc_idx)
{
return tcache_get_n (tc_idx, & tcache->entries[tc_idx]);
}

tcache变为调用tcache_get_n (tc_idx, & tcache->entries[tc_idx])

tcache_get_n的实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
static __always_inline void *
tcache_get_n (size_t tc_idx, tcache_entry **ep)
{
tcache_entry *e;
if (ep == &(tcache->entries[tc_idx]))
e = *ep;
else
e = REVEAL_PTR (*ep);

if (__glibc_unlikely (!aligned_OK (e)))
malloc_printerr ("malloc(): unaligned tcache chunk detected");

if (ep == &(tcache->entries[tc_idx]))
*ep = REVEAL_PTR (e->next);
else
*ep = PROTECT_PTR (ep, REVEAL_PTR (e->next));

--(tcache->counts[tc_idx]);
e->key = 0;
return (void *) e;
}

这样的变化使得tcache可以从链中间取出chunk

不过对于tcache_get来说,实际上并没有什么变化