Skip to content

Instantly share code, notes, and snippets.

@Catfish-Man

Catfish-Man/inline.m

Last active Mar 22, 2020
Embed
What would you like to do?
#import <Foundation/Foundation.h>
#import <assert.h>
//Compile with `clang -Os -framework Foundation -fno-objc-arc inlinestorage.m -o inline, run with `inline clever` or `inline naive`
/*
NaiveArray implements a simple immutable NSArray-like interface in probably the most obvious way: store a pointer to a C array of objects
*/
@interface NaiveArray : NSObject {
NSUInteger count;
id *storage;
}
+ (instancetype)arrayWithObjects:(id *)objects count:(NSUInteger) count;
- (id) objectAtIndex:(NSUInteger)idx;
@end
@implementation NaiveArray
+ (instancetype)arrayWithObjects:(id *)objects count:(NSUInteger) inCount {
NaiveArray *a = [[NaiveArray alloc] init]; //memory allocation 1
a->count = inCount;
a->storage = malloc(sizeof(id) * inCount); //memory allocation 2
memcpy(a->storage, objects, sizeof(id) * inCount);
for (int i = 0; i < inCount; i++) {
[a->storage[i] retain];
}
return [a autorelease];
}
- (id) objectAtIndex:(NSUInteger)idx {
assert(idx < count);
return storage[idx];
}
- (void) dealloc {
for (int i = 0; i < count; i++) {
[storage[i] release];
}
free(storage); //free 2
[super dealloc]; //free 1
}
@end
/*
CleverArray is nearly identical, but uses a little trickery to be variably sized, so it can store the objects inside itself instead of pointing to separate storage
*/
@interface CleverArray : NSObject {
NSUInteger count;
//No storage ivar, we're going to expand the object to fit when we allocate it
}
+ (instancetype)arrayWithObjects:(id *)objects count:(NSUInteger) count;
- (id) objectAtIndex:(NSUInteger)idx;
@end
@implementation CleverArray
+ (instancetype)arrayWithObjects:(id *)objects count:(NSUInteger) inCount {
CleverArray *a = NSAllocateObject(self, inCount * sizeof(id) /* second arg is how many additional bytes to stick on the end of the object */, NULL); //memory allocation 1
a->count = inCount;
id *storage = (id *)object_getIndexedIvars(a); /* object_getIndexedIvars gets us a pointer to the additional bytes; we use it just like the storage ivar earlier */
memcpy(storage , objects, sizeof(id) * inCount);
for (int i = 0; i < inCount; i++) {
[storage[i] retain];
}
return [a autorelease];
}
- (id) objectAtIndex:(NSUInteger)idx {
assert(idx < count);
return ((id *)object_getIndexedIvars(self))[idx];
}
- (void) dealloc {
id *storage = (id *)object_getIndexedIvars(self); //calling this isn't free, so let's not do it each time through the loop
for (int i = 0; i < count; i++) {
[storage[i] release];
}
[super dealloc]; //free 1
}
@end
int main(int argc, const char * argv[]) {
@autoreleasepool {
assert(argc == 2);
BOOL clever = strcmp(argv[1], "clever") == 0;
id objects[3] = { @"test", @"test2", @"test3" };
for (int i = 0; i < 1000000; i++) {
if (clever) {
/*
Physical footprint: 63.0M
COUNT BYTES AVG CLASS_NAME TYPE BINARY
===== ===== === ========== ==== ======
1000000 48000000 48.0 CleverArray ObjC inline
*/
[CleverArray arrayWithObjects:objects count:3];
} else {
/*
Relevant output from `heap -sumObjectFields inline`
Physical footprint: 78.8M
COUNT BYTES AVG CLASS_NAME TYPE BINARY
===== ===== === ========== ==== ======
1000000 32000000 32.0 NaiveArray ObjC inline
1000000 32000000 32.0 NaiveArray[16] ObjC inline
Beyond the obvious (paying for the extra 'storage' pointer with the naive version), each allocation has a cost as well.
One allocation of 2N bytes is usually, though not always, cheaper than 2 allocations of N bytes.
Finally, allocation sizes will be rounded up. In this case despite only needing 24 bytes for 3 object pointers, malloc gave us 32 bytes.
My usual mental model for this is that it allows me to remove the allocation and pointer-indirection overhead of the object "shell" around a C buffer.
*/
[NaiveArray arrayWithObjects:objects count:3];
}
}
sleep(1000);
}
return 0;
}
/*
Q&A
Q: Why not just have a zero-length C array as the last ivar, and use that to access the extra bytes?
A: That's only safe if you can absolutely guarantee none of your superclasses will ever add an ivar without you recompiling, and that you can't be subclassed. Also be careful about alignment. Basically, I wouldn't recommend it. NSArray does it, but its superclasses (NSArray and NSObject) build with it in the OS, so it can get away with stuff like that. The speedup relative to object_getIndexedIvars() isn't very large anyway.
Q: Does this work with ARC?
A: Nope! If you want to play tricks like this you'll have to add a non-ARC file to put them in
Q: Should I use this instead of NSArray and friends
A: Probably not. If you do, measure carefully. Saving the NSArray allocation can help, but NSArray can also do tricks like avoiding the objc_msgSend when retaining objects, which often is one of the biggest time sinks for things like this.
Q: Does the memory ratio above stay the same with more objects?
A: Nope, the more stuff you put in, the closer to lost in the noise these memory savings are. It can be a pretty big win to combine lots of tiny allocations, but combining a few large ones is usually not measurable.
Q: So it saves memory, what about time/cpu?
A: Mixed. Good: fewer mallocs, fewer frees, more cache-friendly. Bad: have to do a function call to get to your storage, sometimes combining allocations will push you up to a slower size to malloc. Overall, usually better though.
Q: What if I'm writing Swift?
A: Check out ManagedBuffer (https://developer.apple.com/documentation/swift/managedbuffer), it encapsulates this pattern in a way that works well with Swift
Q: What about C?
A: Zero-length arrays, the subclassing issues object_getIndexedIvars protects you from don't apply to C, and you already had to worry about alignment. So:
struct CleverCArray {
uint64_t count;
ElementType storage[];
}
a = malloc(sizeof(CleverCArray) + (sizeof(ElementType) * count));
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment