Skip to content

Instantly share code, notes, and snippets.

@sorbits
Created January 16, 2016 10:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sorbits/e1d5f2073c99d3f5fdbf to your computer and use it in GitHub Desktop.
Save sorbits/e1d5f2073c99d3f5fdbf to your computer and use it in GitHub Desktop.
Extract most C/Cocoa symbols via libclang
#import <clang-c/Index.h>
#import <Foundation/Foundation.h>
#import <regex>
#import <stdio.h>
#import <map>
#import <string>
#import <set>
template <typename _InputIter>
std::string strings_to_regexp (_InputIter first, _InputIter last)
{
struct node_t
{
void add_string (std::string::const_iterator first, std::string::const_iterator last)
{
if(first == last)
{
_terminate = true;
}
else
{
auto it = _nodes.find(*first);
if(it == _nodes.end())
it = _nodes.emplace(*first, node_t()).first;
it->second.add_string(++first, last);
}
}
void to_s (std::string& out) const
{
if(_nodes.empty())
return;
out += _terminate || _nodes.size() > 1 ? "(?:" : "";
bool first = true;
for(auto const& pair : _nodes)
{
if(!std::exchange(first, false))
out += '|';
out += pair.first;
pair.second.to_s(out);
}
out += _terminate ? ")?" : (_nodes.size() > 1 ? ")" : "");
}
private:
std::map<char, node_t> _nodes;
bool _terminate = false;
};
node_t n;
for(auto it = first; it != last; ++it)
n.add_string(it->begin(), it->end());
std::string res;
n.to_s(res);
return res;
}
bool is_subset (std::string const& needle, std::string const& haystack)
{
std::string::size_type n = 0, m = 0;
while(n < needle.size() && m < haystack.size())
{
if(needle[n] == haystack[m] || toupper(needle[n]) == haystack[m])
++n;
++m;
}
return n == needle.size();
}
inline std::string format (char const* format, ...) __attribute__ ((format (printf, 1, 2)));
inline std::string format (char const* format, ...)
{
char* tmp = NULL;
va_list ap;
va_start(ap, format);
vasprintf(&tmp, format, ap);
va_end(ap);
std::string res(tmp);
free(tmp);
return res;
}
std::string path_for_cursor (CXCursor cursor)
{
CXSourceRange range = clang_getCursorExtent(cursor);
CXSourceLocation location = clang_getRangeStart(range);
CXFile file;
clang_getFileLocation(location, &file, nullptr, nullptr, nullptr);
CXString filename = clang_getFileName(file);
std::string path = clang_getCString(filename) ?: "";
clang_disposeString(filename);
return path;
}
std::string name_for_cursor (CXCursor cursor)
{
CXString name = clang_getCursorSpelling(cursor);
std::string res = clang_getCString(name);
clang_disposeString(name);
return res;
}
void cocoa (CXTranslationUnit tu, NSString* grammarPath)
{
__block std::map<std::string, std::set<std::string>> info =
{
{ "storage.type.objc", { "instancetype" } },
{ "storage.type.cocoa.objc", { "IBOutlet", "IBAction" } }
};
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){
struct { std::string name; std::regex path_match; } const scopes[] =
{
{ "", std::regex(".*?/objc/(?:objc|NSObjCRuntime).h") },
{ ".cocoa", std::regex(".*?/(?:AddressBook|AppKit|ExceptionHandling|Foundation|WebKit)\\.framework/(?!.*\\.framework/).*") },
};
std::string type;
switch(clang_getCursorKind(cursor))
{
case CXCursor_StructDecl:
case CXCursor_EnumDecl:
case CXCursor_TypedefDecl: type = "support.type"; break;
case CXCursor_FunctionDecl: type = "support.function"; break;
case CXCursor_ObjCProtocolDecl: type = "support.other.protocol"; break;
case CXCursor_ObjCInterfaceDecl: type = "support.class"; break;
case CXCursor_VarDecl: type = "support.variable"; break;
case CXCursor_EnumConstantDecl: type = "support.constant"; break;
default:
return CXChildVisit_Recurse;
break;
}
CXString name = clang_getCursorSpelling(cursor);
char const* str = clang_getCString(name);
if(!strlen(str) || str[0] == '_')
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue;
for(auto const& scope : scopes)
{
if(!std::regex_match(path_for_cursor(cursor), scope.path_match))
continue;
if(type == "support.type" && scope.name.empty())
type = "storage.type";
type += scope.name;
CXPlatformAvailability availability[2];
int n = clang_getCursorPlatformAvailability(cursor, nullptr, nullptr, nullptr, nullptr, &availability[0], sizeof(availability) / sizeof(availability[0]));
for(int i = 0; i < n; ++i)
{
if(strcmp("macosx", clang_getCString(availability[i].Platform)) == 0)
{
auto const& deprecated = availability[i].Deprecated;
if(deprecated.Major == 10)
type = "invalid.deprecated." + std::to_string(deprecated.Major) + "." + std::to_string(deprecated.Minor) + "." + type;
auto const& introduced = availability[i].Introduced;
if(introduced.Major == 10 && introduced.Minor > 7)
type += "." + std::to_string(introduced.Major) + "." + std::to_string(introduced.Minor);
}
}
info[type + ".objc"].insert(str);
break;
}
clang_disposeString(name);
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue;
});
NSMutableArray* patterns = [NSMutableArray new];
NSMutableArray* protocols = [NSMutableArray new];
for(auto const& pair : info)
{
if(pair.second.empty())
continue;
std::string const str = strings_to_regexp(pair.second.begin(), pair.second.end());
NSString* regexp = [NSString stringWithFormat:@"\\b%@\\b", @(str.c_str())];
if(pair.first.find("support.other.protocol") == 0)
{
[protocols addObject:@{
@"name" : @(pair.first.c_str()),
@"match" : regexp
}];
}
else if(pair.first.find("support.function") != std::string::npos)
{
[patterns addObject:@{
@"match" : [NSString stringWithFormat:@"(\\s*)(%@)", regexp],
@"captures" : @{
@"1" : @{ @"name" : @"punctuation.whitespace.support.function.leading.cocoa.objc" },
@"2" : @{ @"name" : @(pair.first.c_str()) }
}
}];
}
else
{
[patterns addObject:@{
@"name" : @(pair.first.c_str()),
@"match" : regexp
}];
}
}
NSMutableDictionary* plist = [NSMutableDictionary dictionaryWithContentsOfFile:grammarPath];
plist[@"patterns"] = patterns;
plist[@"repository"] = @{
@"protocols" : @{
@"patterns" : protocols
}
};
if(![plist writeToFile:grammarPath atomically:YES])
NSLog(@"failed to save %@", grammarPath);
}
void core_foundation (CXTranslationUnit tu, NSString* grammarPath)
{
__block std::map<std::string, std::set<std::string>> info;
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){
struct { std::string name; std::regex path_match; } const scopes[] =
{
{ ".mac-classic", std::regex(".*/MacTypes\\.h") },
{ ".pthread", std::regex(".*/_?pthread(/.*|\\.h)") },
{ ".dispatch", std::regex(".*/dispatch/.*") },
{ ".os", std::regex(".*/(OSByteOrder|gethostuuid)\\.h") },
{ ".cf", std::regex(".*/CoreFoundation\\.framework/.*") },
{ ".quartz", std::regex(".*/CoreGraphics\\.framework/.*") },
{ ".clib", std::regex(".*/(alloca|ctype|_?locale|math|_?select|setjmp|signal|stdarg|stddef|stdint|stdio|stdlib|string|strings|time|types|unistd|sys/(fcntl|resource|select|types|wait)|_types/.*)\\.h$") },
};
std::string type;
switch(clang_getCursorKind(cursor))
{
case CXCursor_StructDecl:
case CXCursor_EnumDecl:
case CXCursor_TypedefDecl: type = "support.type"; break;
case CXCursor_FunctionDecl: type = "support.function"; break;
case CXCursor_VarDecl: type = "support.variable"; break;
case CXCursor_EnumConstantDecl: type = "support.constant"; break;
default:
return CXChildVisit_Recurse;
break;
}
CXString name = clang_getCursorSpelling(cursor);
char const* str = clang_getCString(name);
if(!strlen(str) || str[0] == '_')
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue;
for(auto const& scope : scopes)
{
if(!std::regex_match(path_for_cursor(cursor), scope.path_match))
continue;
info[type + scope.name + ".c"].insert(str);
break;
}
clang_disposeString(name);
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue;
});
NSMutableArray* patterns = [NSMutableArray new];
NSMutableArray* functions = [NSMutableArray new];
for(auto const& pair : info)
{
if(pair.second.empty())
continue;
std::string const str = strings_to_regexp(pair.second.begin(), pair.second.end());
NSString* regexp = [NSString stringWithFormat:@"\\b%@\\b", @(str.c_str())];
if(pair.first.find("support.function") == 0)
{
[functions addObject:@{
@"match" : [NSString stringWithFormat:@"(\\s*)(%@)", regexp],
@"captures" : @{
@"1" : @{ @"name" : @"punctuation.whitespace.support.function.leading.c" },
@"2" : @{ @"name" : @(pair.first.c_str()) }
}
}];
}
else
{
[patterns addObject:@{
@"name" : @(pair.first.c_str()),
@"match" : regexp
}];
}
}
NSMutableDictionary* plist = [NSMutableDictionary dictionaryWithContentsOfFile:grammarPath];
plist[@"patterns"] = patterns;
plist[@"repository"] = @{
@"functions" : @{
@"patterns" : functions
}
};
if(![plist writeToFile:grammarPath atomically:YES])
NSLog(@"failed to save %@", grammarPath);
}
template <typename _OutputIter>
_OutputIter handle_interface_or_category (CXCursor cursor, _OutputIter outArg)
{
// format of cocoa.txt is => method-name abbrv-framework class-type classname[;file-name]? (cm|im) return-type [arg-type]*
// class-type is always Cl
__block std::string interfaceName = name_for_cursor(cursor).c_str();
__block auto out = outArg;
if(clang_getCursorKind(cursor) == CXCursor_ObjCCategoryDecl)
{
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){
if(clang_getCursorKind(cursor) != CXCursor_ObjCClassRef)
return CXChildVisit_Recurse;
interfaceName = name_for_cursor(cursor).c_str();
return CXChildVisit_Break;
});
}
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){
char const* type;
switch(clang_getCursorKind(cursor))
{
case CXCursor_ObjCClassMethodDecl: type = "cm"; break;
case CXCursor_ObjCInstanceMethodDecl: type = "im"; break;
default: return CXChildVisit_Recurse;
}
CXType resultType = clang_getCursorResultType(cursor);
CXString resultTypeStr = clang_getTypeSpelling(resultType);
std::string method = format("%s\t?\tCl\t%s\t%s\t%s", name_for_cursor(cursor).c_str(), interfaceName.c_str(), type, clang_getCString(resultTypeStr));
clang_disposeString(resultTypeStr);
for(int i = 0; i < clang_Cursor_getNumArguments(cursor); ++i)
{
CXCursor argCursor = clang_Cursor_getArgument(cursor, i);
CXType argCursorType = clang_getCursorType(argCursor);
CXString typeSpelling = clang_getTypeSpelling(argCursorType);
method += format("\t%s", clang_getCString(typeSpelling));
clang_disposeString(typeSpelling);
}
if(clang_Cursor_isVariadic(cursor))
method += "\t...";
*out++ = method;
return CXChildVisit_Continue;
});
return out;
}
template <typename _OutputIter>
_OutputIter handle_function (CXCursor cursor, _OutputIter out)
{
// function⇥(arg1, arg2, …)⇥return
std::string line = name_for_cursor(cursor) + "\t(";
for(int i = 0; i < clang_Cursor_getNumArguments(cursor); ++i)
{
if(i > 0)
line += ", ";
CXCursor argCursor = clang_Cursor_getArgument(cursor, i);
CXType argCursorType = clang_getCursorType(argCursor);
CXString typeSpelling = clang_getTypeSpelling(argCursorType);
line += clang_getCString(typeSpelling);
clang_disposeString(typeSpelling);
std::string argName = name_for_cursor(argCursor);
if(!argName.empty())
line += " " + argName;
}
if(clang_Cursor_isVariadic(cursor))
line += ", ...";
line += ")\t";
CXType resultType = clang_getCursorResultType(cursor);
CXString resultTypeStr = clang_getTypeSpelling(resultType);
line += clang_getCString(resultTypeStr);
clang_disposeString(resultTypeStr);
*out++ = line;
return out;
}
int main (int argc, char const* argv[])
{
#if 0
std::map<std::string, std::string> const frameworks = {
{ "AddressBook", "kAB|AB" },
{ "AE", "kAE" },
{ "AppKit", "NS" },
{ "CarbonCore", "kFSOperation" },
{ "CFNetwork", "kCF" },
{ "ColorSync", "kColorSync|kCMM" },
{ "CoreData", "NS" },
{ "CoreFoundation", "kCF" },
{ "CoreGraphics", "kCG|CG" },
{ "CoreImage", "kCI" },
{ "CoreText", "kCT" },
{ "CoreVideo", "kCV" },
{ "DiskArbitration", "kDA" },
{ "Foundation", "NS" },
{ "HIServices", "kAX" },
{ "ImageIO", "kCGImage|kCF" },
{ "IOKit", "kIO" },
{ "IOSurface", "kIOSurface" },
{ "LaunchServices", "kLS|kUT" },
{ "Metadata", "kMD" },
{ "OSServices", "kCS|kWS" },
{ "QD", "kCMIlluminant" },
{ "QuartzCore", "kCA|CA" },
{ "SearchKit", "kSK" },
{ "Security", "kSec|gGuid|CSSMOID_" },
{ "SpeechSynthesis", "kSpeech" },
};
#endif
CXIndex index = clang_createIndex(0, 0);
char const* args[] = { "-x", "objective-c++", "-std=c++1y", "-stdlib=libc++", "--sysroot", "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk" };
CXTranslationUnit tu = clang_parseTranslationUnit(index, "/Users/duff/Shared/Projects/Avian/Other/clang/test.mm", args, sizeof(args) / sizeof(args[0]), nullptr, 0, CXTranslationUnit_None);
#if 01
cocoa(tu, @"/Users/duff/Shared/System/Library/Application Support/Avian/Bundles/objective-c.tmbundle/Syntaxes/Platform.tmLanguage");
core_foundation(tu, @"/Users/duff/Shared/System/Library/Application Support/Avian/Bundles/c.tmbundle/Syntaxes/Platform.tmLanguage");
#else
std::regex const cocoaPathMatch(".*?/((AddressBook|AppKit|CoreData|CoreImage|ExceptionHandling|Foundation|ImageCaptureCore|ImageKit|PDFKit|PreferencePanes|QTKit|QuartzComposer|QuartzCore|QuartzFilters|QuickLookUI|ScreenSaver|WebKit)\\.framework/.*|objc/[^/]+\\.h)");
__block std::set<std::string> interfaces;
__block std::set<std::string> protocols;
__block std::set<std::string> methods;
__block std::set<std::string> functions;
__block std::set<std::string> typedefs;
__block std::set<std::string> types;
__block std::string lastTypedefName;
__block std::set<std::string> lastTypedefConstants;
__block std::set<std::string> frameworks;
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){
if(clang_getCursorAvailability(cursor) != CXAvailability_Available)
return CXChildVisit_Continue;
if(!std::regex_match(path_for_cursor(cursor), cocoaPathMatch))
return CXChildVisit_Continue;
switch(clang_getCursorKind(cursor))
{
case CXCursor_StructDecl:
types.insert(name_for_cursor(cursor));
break;
case CXCursor_ObjCProtocolDecl:
case CXCursor_ObjCInterfaceDecl:
case CXCursor_ObjCCategoryDecl:
{
handle_interface_or_category(cursor, std::inserter(methods, methods.end()));
if(clang_getCursorKind(cursor) == CXCursor_ObjCInterfaceDecl)
interfaces.insert(name_for_cursor(cursor));
else if(clang_getCursorKind(cursor) == CXCursor_ObjCProtocolDecl)
protocols.insert(name_for_cursor(cursor));
// std::smatch match;
// std::regex_search(path_for_cursor(cursor), match, std::regex("/([^/]+)\\.framework(?!.*?\\.framework/)"));
// if(match.size() == 2)
// frameworks.insert(match[1].str());
}
break;
case CXCursor_FunctionDecl:
handle_function(cursor, std::inserter(functions, functions.end()));
break;
case CXCursor_EnumDecl:
{
if(!lastTypedefConstants.empty())
{
// fprintf(stderr, "unnamed constants (wasn’t ‘%s’) ·\n", lastTypedefName.c_str());
// for(auto const& t : lastTypedefConstants)
// fprintf(stderr, " - %s\n", t.c_str());
lastTypedefConstants.clear();
}
lastTypedefConstants.clear();
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){
if(clang_getCursorKind(cursor) != CXCursor_EnumConstantDecl)
return CXChildVisit_Recurse;
lastTypedefConstants.insert(name_for_cursor(cursor));
return CXChildVisit_Continue;
});
std::string name = name_for_cursor(cursor);
if(name.empty() && !lastTypedefName.empty())
{
std::string tmp = lastTypedefName;
if(tmp.rfind("Options") + 7 == tmp.size())
tmp = tmp.substr(0, tmp.size()-7);
size_t subsetCount = 0;
for(auto const& t : lastTypedefConstants)
subsetCount += is_subset(tmp, t) ? 1 : 0;
if(2*subsetCount > lastTypedefConstants.size())
{
name = lastTypedefName;
fprintf(stderr, "fixed via previous typedef: %s\n", name.c_str());
}
}
if(!name.empty())
{
for(auto const& t : lastTypedefConstants)
typedefs.insert(t + "\t\t" + name);
lastTypedefConstants.clear();
}
}
break;
case CXCursor_TypedefDecl:
{
lastTypedefName = name_for_cursor(cursor);
if(!lastTypedefConstants.empty())
{
std::string tmp = lastTypedefName;
if(tmp.rfind("Options") + 7 == tmp.size())
tmp = tmp.substr(0, tmp.size()-7);
size_t subsetCount = 0;
for(auto const& t : lastTypedefConstants)
subsetCount += is_subset(tmp, t) ? 1 : 0;
if(2*subsetCount > lastTypedefConstants.size())
{
fprintf(stderr, "fixed via following typedef: %s\n", lastTypedefName.c_str());
for(auto const& t : lastTypedefConstants)
typedefs.insert(t + "\t\t" + lastTypedefName);
lastTypedefConstants.clear();
}
}
if(!lastTypedefConstants.empty())
{
// fprintf(stderr, "unnamed constants (wasn’t ‘%s’)\n", lastTypedefName.c_str());
// for(auto const& t : lastTypedefConstants)
// fprintf(stderr, " - %s\n", t.c_str());
lastTypedefConstants.clear();
}
}
break;
default:
return CXChildVisit_Recurse;
break;
}
return CXChildVisit_Continue;
});
std::map<char const*, std::set<std::string> const*> map =
{
{ "/tmp/cocoa.txt", &methods },
{ "/tmp/CocoaClasses.txt", &interfaces },
{ "/tmp/CocoaProtocols.txt", &protocols },
{ "/tmp/CocoaFunctions.txt", &functions },
{ "/tmp/CocoaConstants.txt", &typedefs },
// { "/tmp/CocoaTypes.txt", &types },
};
for(auto const& pair : map)
{
if(FILE* fp = fopen(pair.first, "w"))
{
for(auto const& str : *pair.second)
fprintf(fp, "%s\n", str.c_str());
fclose(fp);
}
}
#endif
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment