类的布局——方法缓存hash表

回顾一下class的结构:

1
2
3
4
5
6
7
8
9
struct objc_class : objc_object {
// Class ISA; // 继承自 struct objc_object
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits;
class_rw_t *data() const {
return bits.data();
}
};

不难发现,在objc_class结构中,有一个cache_t类型的成员变量cache

其结构如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
struct bucket_t {
private:
IMP _imp;
SEL _sel;
};
struct cache_t {
private:
uintptr_t _bucketsAndMaybeMask;
union {
struct {
uint32_t _unused;
uint16_t _occupied;
uint16_t _flags;
};
preopt_cache_t * _originalPreoptCache;
};
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// objc-cache.m
static constexpr uintptr_t maskShift = 48;
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

struct bucket_t *cache_t::buckets() const {
uintptr_t addr = _bucketsAndMaybeMask;
return (bucket_t *)(addr & bucketsMask);
}
// 哈希表已用
mask_t cache_t::occupied() const {
return _occupied;
}
// 哈希表容积
unsigned cache_t::capacity() const {
return mask() ? mask()+1 : 0;
}
mask_t cache_t::mask() const {
uintptr_t maskAndBuckets = _bucketsAndMaybeMask;
return maskAndBuckets >> maskShift;
}

通过查看cache_tbucket_t的结构以及其实现,可以很清晰的看到类的整个缓存表的内容。

接下来用lldb简单验证下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
//  CacheClass.h
@interface CacheClass : NSObject
- (void)cacheMethodA;
- (void)cacheMethodB:(NSString *)str;
- (void)cacheMethodC:(NSUInteger)integer andString:(NSString *)str;
@end

// CacheClass.m
#import "CacheClass.h"
@implementation CacheClass
- (void)cacheMethodA {
NSLog(@"%s", __func__);
}
- (void)cacheMethodB:(NSString *)str {
NSLog(@"%@===>%s", str, __func__);
}
- (void)cacheMethodC:(NSUInteger)integer andString:(NSString *)str {
NSLog(@"%lu====>%@===>%s", integer, str, __func__);
}
@end

// 调用
CacheClass *cacheObj = [[CacheClass alloc] init];
[cacheObj cacheMethodA];
NSLog(@"===================="); // 此行断点

获取CacheObj的isa

1
2
3
4
(lldb) x/1gx cacheObj
0x281da0bc0: 0x0100000102c2d101
(lldb) p/x 0x0100000102c2d101 & 0x0000000ffffffff8ULL
(unsigned long long) $4 = 0x0000000102c2d100

读取cache_t结构体:

1
2
(lldb) x/2gx 0x0000000102c2d100+0x10		// 此处+0x10是因为cache是在`objc_class`结构体的第16字节处开始。
0x102c2d110: 0x0001000281f850c0 0x8010000200000000 // 0x0001000281f850c0 为 _bucketsAndMaybeMask的值 0x8010000200000000 为 联合体的值

获取bucket_t数组的首地址:

1
2
3
4
(lldb) p/x ((uintptr_t)1 << (48-4))-1		// 计算 bucketsMask 的值
(unsigned long) $4 = 0x00000fffffffffff
(lldb) p/x 0x0001000281f850c0 & 0x00000fffffffffff // _bucketsAndMaybeMask & bucketsMask
(long) $5 = 0x0000000281f850c0 // bucket_t数组的首地址

获取bucket_t数组的count:

1
2
(lldb) p/x (0x0001000281f850c0 >> 48) + 1		// 相当于调用cache_t::capacity()函数
(long) $9 = 0x0000000000000002

输出bucket_t数组的内容:

1
2
3
(lldb) x/4gx 0x0000000281f850c0
0x281f850c0: 0x58226481ad686ff0 0x00000001b0870410
0x281f850d0: 0x3c04798102c25d08 0x0000000102c26f97

以{IMP,SEL}的结构验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
(lldb) p (char *)0x00000001b0870410
(char *) $10 = 0x00000001b0870410 "init"
(lldb) dis -a 0x58226481ad686ff0
libobjc.A.dylib`-[NSObject init]:
0x1ad686ff0 <+0>: ret
0x1ad686ff4 <+4>: udf #0x0
0x1ad686ff8 <+8>: udf #0x0
(lldb) p (char *)0x0000000102c26f97
(char *) $11 = 0x0000000102c26f97 "cacheMethodA"
(lldb) dis -a 0x3c04798102c25d08
MethodCacheDemo`-[CacheClass cacheMethodA]:
0x102c25d08 <+0>: sub sp, sp, #0x30
0x102c25d0c <+4>: stp x29, x30, [sp, #0x20]
0x102c25d10 <+8>: add x29, sp, #0x20
0x102c25d14 <+12>: stur x0, [x29, #-0x8]
0x102c25d18 <+16>: str x1, [sp, #0x10]
0x102c25d1c <+20>: mov x9, sp
0x102c25d20 <+24>: adrp x8, 2
0x102c25d24 <+28>: add x8, x8, #0x363 ; "-[CacheClass cacheMethodA]"
0x102c25d28 <+32>: str x8, [x9]
0x102c25d2c <+36>: adrp x0, 3
0x102c25d30 <+40>: add x0, x0, #0x90 ; @"%s"
0x102c25d34 <+44>: bl 0x102c26344 ; symbol stub for: NSLog
0x102c25d38 <+48>: ldp x29, x30, [sp, #0x20]
0x102c25d3c <+52>: add sp, sp, #0x30
0x102c25d40 <+56>: ret

从输出结果可以看出,缓存数组位置是正确的,类CacheClass缓存了两个方法,分别为:-[NSObject init]-[CacheClass cacheMethodA]

用代码获取:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#if __arm64__
#if TARGET_OS_EXCLAVEKIT
#define ISA_MASK 0xfffffffffffffff8ULL
#elif __has_feature(ptrauth_calls) || TARGET_OS_SIMULATOR
#define ISA_MASK 0x007ffffffffffff8ULL
#else
#define ISA_MASK 0x0000000ffffffff8ULL
#endif
#endif

uintptr_t _isaForObject(NSObject *obj) {
if (obj == nil) return 0;
struct _object {
BytePtr isa;
};
struct _object *obj_ptr = (struct _object *)(__bridge void *)obj;
return (uintptr_t)((uintptr_t)obj_ptr->isa & ISA_MASK);
}

typedef uint32_t mask_t;
//cache_t源码模仿
const uintptr_t maskShift = 48;
const uintptr_t maskZeroBits = 4;
const uintptr_t maxMask = (((uintptr_t)1 << (64 - maskShift))-1);
const uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1; //0x100000000000-1 = 0xfffffffffff
struct bucket_t {
IMP _imp;
SEL _sel;
};
struct cache_t {
uintptr_t _bucketsAndMaybeMask; // 8
union {
struct {
uint32_t _unused;
uint16_t _occupied;
uint16_t _flags;
};
uintptr_t _originalPreoptCache; // 8
};
};
struct bucket_t *buckets(struct cache_t *cache) {
return (struct bucket_t *)(cache->_bucketsAndMaybeMask & bucketsMask);
}
uint32_t mask(struct cache_t *cache) {
return (uint32_t)(cache->_bucketsAndMaybeMask >> maskShift);
}
uint32_t capacity(struct cache_t *cache) {
return mask(cache) ? mask(cache)+1 : 0;
}
mask_t occupied(struct cache_t *cache) {
return cache->_occupied;
}

void _printMethodCaches(id obj) {
printf("============================\n");
uintptr_t isa = _isaForObject(obj);

// 读取cache结构体
struct cache_t *cache = (struct cache_t *)(isa + 0x10);

// 读取bucket_t
struct bucket_t *bucket_array = buckets(cache);

// 获取count
uint32_t count = capacity(cache);

// 获取已缓存数
uint32_t occupied_count = occupied(cache);

printf("哈希表容积:%u\t\t\t已缓存方法数:%u\n",count, occupied_count);

// 输出缓存内容
for (int c = 0; c < count; c++) {
struct bucket_t *bucket = (bucket_array + c);
printf("imp->sel:0x%lx->%s\n", (intptr_t)bucket->_imp, sel_getName(bucket->_sel));
}
printf("============================\n");
}

// 调用
CacheClass *cacheObj = [[CacheClass alloc] init];
[cacheObj cacheMethodA];
_printMethodCaches(cacheObj);

// 输出
============================
哈希表容积:2 已缓存方法数:2
imp->sel:0x4f5fd201ad686ff0->init
imp->sel:0xe9344e810494dc50->cacheMethodA
============================

需要注意的是,缓存哈希表有一个扩容的过程,当缓存方法超过了哈希表容积时,就会触发扩容,此时,之前的缓存并不会被复制到新的hash表中,而是重新还是缓存!

例如上面的调用修改为如下:

1
2
3
4
5
CacheClass *cacheObj = [[CacheClass alloc] init];
[cacheObj cacheMethodA];
[cacheObj cacheMethodB:@"B"];

_printMethodCaches(cacheObj);

则输出为:

1
2
3
4
5
6
7
============================
哈希表容积:4 已缓存方法数:1
imp->sel:0x0-><null selector>
imp->sel:0x0-><null selector>
imp->sel:0xf85bf08100269c6c->cacheMethodB:
imp->sel:0x0-><null selector>
============================

从上面的输出可以看出,调用方法-[CacheClass cacheMethodB:]时,触发了缓存表扩容;扩容过程中,它舍弃了原缓存表中的方法,仅缓存了当前方法(-[CacheClass cacheMethodB:])。