类的布局——方法列表(1)

对于OC的实例方法,用法是先实例化对象,再用实例对象调用方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// ObjectA.h
@interface ObjectA : NSObject
@property(nonatomic,assign,readonly)BOOL b;
- (void)funcA;
- (void)funcB:(NSString *)str;
@end

// ObjectA.m
#import "ObjectA.h"
@implementation ObjectA
- (void)funcA {
NSLog(@"方法A");
}
- (void)funcB:(NSString *)str {
NSLog(@"方法B:%@", str);
}
@end

// 调用实例方法
ObjectA *aObj = [[ObjectA alloc] init];
[aObj funcA];

和成员变量的使用方式一样,都是先初始化对象,再使用。这就容易让人产生一个误区:实例方法和成员变量一样,每个对象独一份,在对象初始化时存储在堆区。

事实上,实例方法和成员变量有着本质的区别,成员变量是在对象实例化之后,存储在内存的堆区(即对象所在的地址);而实例方法作为可执行部分,编译之后存储在代码段,实例方法的地址(IMP)、方法名等信息则被存储在类对象中。

类对象的定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
struct class_rw_ext_t {
DECLARE_AUTHED_PTR_TEMPLATE(class_ro_t)
class_ro_t_authed_ptr<const class_ro_t> ro;
method_array_t methods;
property_array_t properties;
protocol_array_t protocols;
const char *demangledName;
uint32_t version;
};

struct class_rw_t {
// Be warned that Symbolication knows the layout of this structure.
uint32_t flags;
uint16_t witness;
explicit_atomic<uintptr_t> ro_or_rw_ext;

private:
using ro_or_rw_ext_t = objc::PointerUnion<const class_ro_t, class_rw_ext_t, PTRAUTH_STR("class_ro_t"), PTRAUTH_STR("class_rw_ext_t")>;
};
struct class_data_bits_t {
// Values are the FAST_ flags above.
uintptr_t bits;
};

struct objc_class : objc_object {
// Class ISA; // 继承自 struct objc_object
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits;
class_rw_t *data() const {
return bits.data();
}
};

用一个图大概描述一下其内存布局:

用lldb大致验证一下:

初始化对象:

1
2
ObjectA *aObj = [[ObjectA alloc] init];
NSLog(@"=====分割线====="); // 此处下断点

获取对象地址:

1
2
(lldb) po aObj
<ObjectA: 0x282dccdd0>

获取isa指针:

1
2
3
4
(lldb) x/1gx 0x282dccdd0
0x282dccdd0: 0x01000001001f14a9 // 0x01000001001f14a9 为优化后的isa指针,要获取真实的isa指针,需进一步处理
(lldb) p/x 0x01000001001f14a9 & 0x0000000ffffffff8ULL
(unsigned long long) $1 = 0x00000001001f14a8 // 0x00000001001f14a8 为真实的isa指针

获取bits:

1
2
3
4
(lldb) x/5gx 0x00000001001f14a8
0x1001f14a8: 0x00000001001f1480 0x000000020afa07d8
0x1001f14b8: 0x0003000283a88440 0x8018000100000000
0x1001f14c8: 0x8000000282fea4e4 // 0x8000000282fea4e4 即为bits的值

从bits值中获取class_rw_t结构体:

1
2
(lldb) p/x 0x8000000282fea4e4 & 0x0f00007ffffffff8UL
(unsigned long) $2 = 0x0000000282fea4e0 // struct class_rw_t指针

获取class_ro_t结构体:

1
2
3
4
(lldb) x/2gx 0x0000000282fea4e0
0x282fea4e0: 0x0000000080080006 0x00000001001f02c8 // 0x00000001001f02c8 为ro_or_rw_ext联合指针
(lldb) p/x 0x00000001001f02c8 & 0x1
(long) $3 = 0x0000000000000000 // ro_or_rw_ext & 0x1 == 0 ,则 ro_or_rw_ex 的值即为class_ro_t指针

获取baseMethods

1
2
3
4
(lldb) x/5gx 0x00000001001f02c8
0x1001f02c8: 0x0000000800000080 0x0000000000000009
0x1001f02d8: 0x0000000000000000 0x00000001001ef3d3
0x1001f02e8: 0x00000001001f0238 // 该指针即为baseMethods

查看baseMethods的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
// baseMethods为struct method_list_t指针。
// struct method_list_t 结构如下:

template <typename Element, typename List, uint32_t FlagMask, typename PointerModifier = PointerModifierNop>
struct entsize_list_tt {
uint32_t entsizeAndFlags;
uint32_t count;
uint32_t entsize() const {
return entsizeAndFlags & ~FlagMask;
}
uint32_t flags() const {
return entsizeAndFlags & FlagMask;
}

ALWAYS_INLINE
Element& getOrEnd(uint32_t i) const {
ASSERT(i <= count);
uint32_t iBytes;
if (os_mul_overflow(i, entsize(), &iBytes))
_objc_fatal("entsize_list_tt overflow: index %" PRIu32 " in list %p with entsize %" PRIu32,
i, this, entsize());
return *PointerModifier::modify(*(List *)this, (Element *)((uint8_t *)this + sizeof(*this) + iBytes));
}

Element& get(uint32_t i) const {
ASSERT(i < count);
return getOrEnd(i);
}
...
};

struct method_list_t : entsize_list_tt<method_t, method_list_t, 0xffff0003, method_t::pointer_modifier> {
...
};

乍一看,似乎这里并没有存储任何和方法列表相关的信息,但是细看结构体模版entsize_list_ttgetOrEnd实现就会发现,该模版在结构体内部维护了一个Element类型的数组,其实际结构可以理解为:

1
2
3
4
5
struct entsize_list_tt {
uint32_t entsizeAndFlags;
uint32_t count;
Element elements[0];
};

从上面可以看出,该模版作为method_list_t的实现,Element的类型为:method_t

1
2
3
4
5
struct method_t {
SEL name;
const char *types;
MethodListIMP imp;
};

到这里,baseMethods的内容便呼之欲出了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
(lldb) x/15gx 0x00000001001f0238			// 以指针的形式读取15条baseMethods的内容
0x1001f0238: 0x000000030000001b 0x00000001001ef035 //0x000000030000001b 为 entsizeAndFlags 和 count 的值
0x1001f0248: 0x00000001001ef444 0x00000001001edd24
0x1001f0258: 0x00000001001ef03b 0x00000001001ef44c
0x1001f0268: 0x00000001001edd50 0x00000001b04f22e9
0x1001f0278: 0x00000001001ef457 0x00000001001eddb4
0x1001f0288: 0x0000000100000020 0x00000001001f1420
0x1001f0298: 0x00000001001ee619 0x00000001001ef45f
0x1001f02a8: 0x0000000100000000
(lldb) p (SEL)0x00000001001ef035 // 读取第1个method_t的name
(SEL) $11 = "funcA"
(lldb) p (char *)0x00000001001ef444 // 读取第1个method_t的types
(char *) $12 = 0x00000001001ef444 "v16@0:8"
(lldb) dis -a 0x00000001001edd24 // 通过反编译指令验证第1个method_t的imp
ObjectDemo`-[ObjectA funcA]:
0x1001edd24 <+0>: sub sp, sp, #0x20
0x1001edd28 <+4>: stp x29, x30, [sp, #0x10]
0x1001edd2c <+8>: add x29, sp, #0x10
0x1001edd30 <+12>: str x0, [sp, #0x8]
0x1001edd34 <+16>: str x1, [sp]
0x1001edd38 <+20>: adrp x0, 3
0x1001edd3c <+24>: add x0, x0, #0x90 ; @
0x1001edd40 <+28>: bl 0x1001ee3a8 ; symbol stub for: NSLog
0x1001edd44 <+32>: ldp x29, x30, [sp, #0x10]
0x1001edd48 <+36>: add sp, sp, #0x20
0x1001edd4c <+40>: ret
(lldb) p (SEL)0x00000001001ef03b // 读取第2个method_t的name
(SEL) $13 = "funcB:"
(lldb) p (char *)0x00000001001ef44c // 读取第2个method_t的types
(char *) $14 = 0x00000001001ef44c "v24@0:8@16"
(lldb) dis -a 0x00000001001edd50 // 通过反编译指令验证第2个method_t的imp
ObjectDemo`-[ObjectA funcB:]:
0x1001edd50 <+0>: sub sp, sp, #0x40
0x1001edd54 <+4>: stp x29, x30, [sp, #0x30]
0x1001edd58 <+8>: add x29, sp, #0x30
0x1001edd5c <+12>: mov x8, x1
0x1001edd60 <+16>: mov x1, x2
0x1001edd64 <+20>: stur x0, [x29, #-0x8]
0x1001edd68 <+24>: stur x8, [x29, #-0x10]
0x1001edd6c <+28>: add x0, sp, #0x18
0x1001edd70 <+32>: str x0, [sp, #0x8]
0x1001edd74 <+36>: mov x8, #0x0
0x1001edd78 <+40>: str x8, [sp, #0x10]
0x1001edd7c <+44>: str xzr, [sp, #0x18]
0x1001edd80 <+48>: bl 0x1001ee42c ; symbol stub for: objc_storeStrong
0x1001edd84 <+52>: ldr x8, [sp, #0x18]
0x1001edd88 <+56>: mov x9, sp
0x1001edd8c <+60>: str x8, [x9]
0x1001edd90 <+64>: adrp x0, 3
0x1001edd94 <+68>: add x0, x0, #0xb0 ; @
0x1001edd98 <+72>: bl 0x1001ee3a8 ; symbol stub for: NSLog
0x1001edd9c <+76>: ldr x0, [sp, #0x8]
0x1001edda0 <+80>: ldr x1, [sp, #0x10]
0x1001edda4 <+84>: bl 0x1001ee42c ; symbol stub for: objc_storeStrong
0x1001edda8 <+88>: ldp x29, x30, [sp, #0x30]
0x1001eddac <+92>: add sp, sp, #0x40
0x1001eddb0 <+96>: ret

用lldb将方法列表的存储位置过一遍后,整个过程清晰了很多,接下来再用代码走一遍:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
// 定义 ISA_MASK 用于获取isa指针
#if __arm64__
#if TARGET_OS_EXCLAVEKIT
#define ISA_MASK 0xfffffffffffffff8ULL
#elif __has_feature(ptrauth_calls) || TARGET_OS_SIMULATOR
#define ISA_MASK 0x007ffffffffffff8ULL
#else
#define ISA_MASK 0x0000000ffffffff8ULL
#endif
#endif

// 定义 FAST_DATA_MASK 用于获取class_rw_t
#if TARGET_OS_EXCLAVEKIT
#define FAST_DATA_MASK 0x0000001ffffffff8UL
#elif TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
#define FAST_DATA_MASK 0x0f00007ffffffff8UL
#else
#define FAST_DATA_MASK 0x0f007ffffffffff8UL
#endif

// 定义方法结构体
struct method_t {
SEL sel;
char *types;
IMP imp;
};

// 定义方法列表结构体
struct method_list_t {
uint32_t entsizeAndFlags;
uint32_t count;
struct method_t elements[0];
};

// 从实例对象中获取isa指针
BytePtr _isaForObject(NSObject *obj) {
if (obj == nil) return NULL;
struct _object {
BytePtr isa;
};
struct _object *obj_ptr = (struct _object *)(__bridge void *)obj;
return (BytePtr)((int64_t)obj_ptr->isa & ISA_MASK);
}

// 从isa指针中获取方法列表和方法数
void _methodsWithIsa(BytePtr isa, struct method_t **methods, uint32_t *count) {
// 获取isa中的bits
uintptr_t bits = *((uintptr_t *)(isa + 8/*isa*/ + 8/*superclass*/ + 16/*cache*/));

// 获取 class_rw_t
uintptr_t class_rw_t = bits & FAST_DATA_MASK;

// 获取 ro_or_rw_ext
uintptr_t ro_or_rw_ext = *((uintptr_t *)(class_rw_t + 0x8));

// 获取class_ro_t
// 判断是class_rw_ext_t 还是 class_ro_t
uintptr_t class_ro_t = ro_or_rw_ext;
if (ro_or_rw_ext & 0x1) {
// class_rw_ext_t
class_ro_t = *((uintptr_t *)ro_or_rw_ext);
}

// 获取class_ro_t 中的 baseMethods
uintptr_t baseMethods = *(uintptr_t *)(class_ro_t + 4/*flags*/ + 4/*instanceStart*/ + 4/*instanceSize*/ + 4/*reserved*/ + 8/*ivarLayout*/ + 8/*name*/);

struct method_list_t *_method_list_t = (struct method_list_t *)baseMethods;
*count = _method_list_t->count;
*methods = _method_list_t->elements;
};

// 输出方法信息
void _printMethods(struct method_t *methods, uint32_t count) {
struct method_t *node = methods;
for (int i=0; i<count; i++) {
printf("=============================\n");
printf("SEL:%s\n", sel_getName(node->sel));
printf("types:%s\n",node->types);
printf("imp:0x%lx\n",(uintptr_t)node->imp);
node++;
}
printf("=============================\n");
}

// 调用
ObjectA *aObj = [[ObjectA alloc] init];

BytePtr a_isa = _isaForObject(aObj);
struct method_t *method = NULL;
uint32_t count;
_methodsWithIsa(a_isa, &method, &count);
_printMethods(method, count);

// 输出如下:
=============================
SEL:funcA
types:v16@0:8
imp:0x1021ddce8
=============================
SEL:funcB:
types:v24@0:8@16
imp:0x1021ddd14
=============================
SEL:b
types:B16@0:8
imp:0x1021ddd78
=============================