Created
January 16, 2016 10:34
-
-
Save sorbits/e1d5f2073c99d3f5fdbf to your computer and use it in GitHub Desktop.
Extract most C/Cocoa symbols via libclang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import <clang-c/Index.h> | |
#import <Foundation/Foundation.h> | |
#import <regex> | |
#import <stdio.h> | |
#import <map> | |
#import <string> | |
#import <set> | |
template <typename _InputIter> | |
std::string strings_to_regexp (_InputIter first, _InputIter last) | |
{ | |
struct node_t | |
{ | |
void add_string (std::string::const_iterator first, std::string::const_iterator last) | |
{ | |
if(first == last) | |
{ | |
_terminate = true; | |
} | |
else | |
{ | |
auto it = _nodes.find(*first); | |
if(it == _nodes.end()) | |
it = _nodes.emplace(*first, node_t()).first; | |
it->second.add_string(++first, last); | |
} | |
} | |
void to_s (std::string& out) const | |
{ | |
if(_nodes.empty()) | |
return; | |
out += _terminate || _nodes.size() > 1 ? "(?:" : ""; | |
bool first = true; | |
for(auto const& pair : _nodes) | |
{ | |
if(!std::exchange(first, false)) | |
out += '|'; | |
out += pair.first; | |
pair.second.to_s(out); | |
} | |
out += _terminate ? ")?" : (_nodes.size() > 1 ? ")" : ""); | |
} | |
private: | |
std::map<char, node_t> _nodes; | |
bool _terminate = false; | |
}; | |
node_t n; | |
for(auto it = first; it != last; ++it) | |
n.add_string(it->begin(), it->end()); | |
std::string res; | |
n.to_s(res); | |
return res; | |
} | |
bool is_subset (std::string const& needle, std::string const& haystack) | |
{ | |
std::string::size_type n = 0, m = 0; | |
while(n < needle.size() && m < haystack.size()) | |
{ | |
if(needle[n] == haystack[m] || toupper(needle[n]) == haystack[m]) | |
++n; | |
++m; | |
} | |
return n == needle.size(); | |
} | |
inline std::string format (char const* format, ...) __attribute__ ((format (printf, 1, 2))); | |
inline std::string format (char const* format, ...) | |
{ | |
char* tmp = NULL; | |
va_list ap; | |
va_start(ap, format); | |
vasprintf(&tmp, format, ap); | |
va_end(ap); | |
std::string res(tmp); | |
free(tmp); | |
return res; | |
} | |
std::string path_for_cursor (CXCursor cursor) | |
{ | |
CXSourceRange range = clang_getCursorExtent(cursor); | |
CXSourceLocation location = clang_getRangeStart(range); | |
CXFile file; | |
clang_getFileLocation(location, &file, nullptr, nullptr, nullptr); | |
CXString filename = clang_getFileName(file); | |
std::string path = clang_getCString(filename) ?: ""; | |
clang_disposeString(filename); | |
return path; | |
} | |
std::string name_for_cursor (CXCursor cursor) | |
{ | |
CXString name = clang_getCursorSpelling(cursor); | |
std::string res = clang_getCString(name); | |
clang_disposeString(name); | |
return res; | |
} | |
void cocoa (CXTranslationUnit tu, NSString* grammarPath) | |
{ | |
__block std::map<std::string, std::set<std::string>> info = | |
{ | |
{ "storage.type.objc", { "instancetype" } }, | |
{ "storage.type.cocoa.objc", { "IBOutlet", "IBAction" } } | |
}; | |
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){ | |
struct { std::string name; std::regex path_match; } const scopes[] = | |
{ | |
{ "", std::regex(".*?/objc/(?:objc|NSObjCRuntime).h") }, | |
{ ".cocoa", std::regex(".*?/(?:AddressBook|AppKit|ExceptionHandling|Foundation|WebKit)\\.framework/(?!.*\\.framework/).*") }, | |
}; | |
std::string type; | |
switch(clang_getCursorKind(cursor)) | |
{ | |
case CXCursor_StructDecl: | |
case CXCursor_EnumDecl: | |
case CXCursor_TypedefDecl: type = "support.type"; break; | |
case CXCursor_FunctionDecl: type = "support.function"; break; | |
case CXCursor_ObjCProtocolDecl: type = "support.other.protocol"; break; | |
case CXCursor_ObjCInterfaceDecl: type = "support.class"; break; | |
case CXCursor_VarDecl: type = "support.variable"; break; | |
case CXCursor_EnumConstantDecl: type = "support.constant"; break; | |
default: | |
return CXChildVisit_Recurse; | |
break; | |
} | |
CXString name = clang_getCursorSpelling(cursor); | |
char const* str = clang_getCString(name); | |
if(!strlen(str) || str[0] == '_') | |
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue; | |
for(auto const& scope : scopes) | |
{ | |
if(!std::regex_match(path_for_cursor(cursor), scope.path_match)) | |
continue; | |
if(type == "support.type" && scope.name.empty()) | |
type = "storage.type"; | |
type += scope.name; | |
CXPlatformAvailability availability[2]; | |
int n = clang_getCursorPlatformAvailability(cursor, nullptr, nullptr, nullptr, nullptr, &availability[0], sizeof(availability) / sizeof(availability[0])); | |
for(int i = 0; i < n; ++i) | |
{ | |
if(strcmp("macosx", clang_getCString(availability[i].Platform)) == 0) | |
{ | |
auto const& deprecated = availability[i].Deprecated; | |
if(deprecated.Major == 10) | |
type = "invalid.deprecated." + std::to_string(deprecated.Major) + "." + std::to_string(deprecated.Minor) + "." + type; | |
auto const& introduced = availability[i].Introduced; | |
if(introduced.Major == 10 && introduced.Minor > 7) | |
type += "." + std::to_string(introduced.Major) + "." + std::to_string(introduced.Minor); | |
} | |
} | |
info[type + ".objc"].insert(str); | |
break; | |
} | |
clang_disposeString(name); | |
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue; | |
}); | |
NSMutableArray* patterns = [NSMutableArray new]; | |
NSMutableArray* protocols = [NSMutableArray new]; | |
for(auto const& pair : info) | |
{ | |
if(pair.second.empty()) | |
continue; | |
std::string const str = strings_to_regexp(pair.second.begin(), pair.second.end()); | |
NSString* regexp = [NSString stringWithFormat:@"\\b%@\\b", @(str.c_str())]; | |
if(pair.first.find("support.other.protocol") == 0) | |
{ | |
[protocols addObject:@{ | |
@"name" : @(pair.first.c_str()), | |
@"match" : regexp | |
}]; | |
} | |
else if(pair.first.find("support.function") != std::string::npos) | |
{ | |
[patterns addObject:@{ | |
@"match" : [NSString stringWithFormat:@"(\\s*)(%@)", regexp], | |
@"captures" : @{ | |
@"1" : @{ @"name" : @"punctuation.whitespace.support.function.leading.cocoa.objc" }, | |
@"2" : @{ @"name" : @(pair.first.c_str()) } | |
} | |
}]; | |
} | |
else | |
{ | |
[patterns addObject:@{ | |
@"name" : @(pair.first.c_str()), | |
@"match" : regexp | |
}]; | |
} | |
} | |
NSMutableDictionary* plist = [NSMutableDictionary dictionaryWithContentsOfFile:grammarPath]; | |
plist[@"patterns"] = patterns; | |
plist[@"repository"] = @{ | |
@"protocols" : @{ | |
@"patterns" : protocols | |
} | |
}; | |
if(![plist writeToFile:grammarPath atomically:YES]) | |
NSLog(@"failed to save %@", grammarPath); | |
} | |
void core_foundation (CXTranslationUnit tu, NSString* grammarPath) | |
{ | |
__block std::map<std::string, std::set<std::string>> info; | |
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){ | |
struct { std::string name; std::regex path_match; } const scopes[] = | |
{ | |
{ ".mac-classic", std::regex(".*/MacTypes\\.h") }, | |
{ ".pthread", std::regex(".*/_?pthread(/.*|\\.h)") }, | |
{ ".dispatch", std::regex(".*/dispatch/.*") }, | |
{ ".os", std::regex(".*/(OSByteOrder|gethostuuid)\\.h") }, | |
{ ".cf", std::regex(".*/CoreFoundation\\.framework/.*") }, | |
{ ".quartz", std::regex(".*/CoreGraphics\\.framework/.*") }, | |
{ ".clib", std::regex(".*/(alloca|ctype|_?locale|math|_?select|setjmp|signal|stdarg|stddef|stdint|stdio|stdlib|string|strings|time|types|unistd|sys/(fcntl|resource|select|types|wait)|_types/.*)\\.h$") }, | |
}; | |
std::string type; | |
switch(clang_getCursorKind(cursor)) | |
{ | |
case CXCursor_StructDecl: | |
case CXCursor_EnumDecl: | |
case CXCursor_TypedefDecl: type = "support.type"; break; | |
case CXCursor_FunctionDecl: type = "support.function"; break; | |
case CXCursor_VarDecl: type = "support.variable"; break; | |
case CXCursor_EnumConstantDecl: type = "support.constant"; break; | |
default: | |
return CXChildVisit_Recurse; | |
break; | |
} | |
CXString name = clang_getCursorSpelling(cursor); | |
char const* str = clang_getCString(name); | |
if(!strlen(str) || str[0] == '_') | |
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue; | |
for(auto const& scope : scopes) | |
{ | |
if(!std::regex_match(path_for_cursor(cursor), scope.path_match)) | |
continue; | |
info[type + scope.name + ".c"].insert(str); | |
break; | |
} | |
clang_disposeString(name); | |
return clang_getCursorKind(cursor) == CXCursor_EnumDecl ? CXChildVisit_Recurse : CXChildVisit_Continue; | |
}); | |
NSMutableArray* patterns = [NSMutableArray new]; | |
NSMutableArray* functions = [NSMutableArray new]; | |
for(auto const& pair : info) | |
{ | |
if(pair.second.empty()) | |
continue; | |
std::string const str = strings_to_regexp(pair.second.begin(), pair.second.end()); | |
NSString* regexp = [NSString stringWithFormat:@"\\b%@\\b", @(str.c_str())]; | |
if(pair.first.find("support.function") == 0) | |
{ | |
[functions addObject:@{ | |
@"match" : [NSString stringWithFormat:@"(\\s*)(%@)", regexp], | |
@"captures" : @{ | |
@"1" : @{ @"name" : @"punctuation.whitespace.support.function.leading.c" }, | |
@"2" : @{ @"name" : @(pair.first.c_str()) } | |
} | |
}]; | |
} | |
else | |
{ | |
[patterns addObject:@{ | |
@"name" : @(pair.first.c_str()), | |
@"match" : regexp | |
}]; | |
} | |
} | |
NSMutableDictionary* plist = [NSMutableDictionary dictionaryWithContentsOfFile:grammarPath]; | |
plist[@"patterns"] = patterns; | |
plist[@"repository"] = @{ | |
@"functions" : @{ | |
@"patterns" : functions | |
} | |
}; | |
if(![plist writeToFile:grammarPath atomically:YES]) | |
NSLog(@"failed to save %@", grammarPath); | |
} | |
template <typename _OutputIter> | |
_OutputIter handle_interface_or_category (CXCursor cursor, _OutputIter outArg) | |
{ | |
// format of cocoa.txt is => method-name abbrv-framework class-type classname[;file-name]? (cm|im) return-type [arg-type]* | |
// class-type is always Cl | |
__block std::string interfaceName = name_for_cursor(cursor).c_str(); | |
__block auto out = outArg; | |
if(clang_getCursorKind(cursor) == CXCursor_ObjCCategoryDecl) | |
{ | |
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){ | |
if(clang_getCursorKind(cursor) != CXCursor_ObjCClassRef) | |
return CXChildVisit_Recurse; | |
interfaceName = name_for_cursor(cursor).c_str(); | |
return CXChildVisit_Break; | |
}); | |
} | |
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){ | |
char const* type; | |
switch(clang_getCursorKind(cursor)) | |
{ | |
case CXCursor_ObjCClassMethodDecl: type = "cm"; break; | |
case CXCursor_ObjCInstanceMethodDecl: type = "im"; break; | |
default: return CXChildVisit_Recurse; | |
} | |
CXType resultType = clang_getCursorResultType(cursor); | |
CXString resultTypeStr = clang_getTypeSpelling(resultType); | |
std::string method = format("%s\t?\tCl\t%s\t%s\t%s", name_for_cursor(cursor).c_str(), interfaceName.c_str(), type, clang_getCString(resultTypeStr)); | |
clang_disposeString(resultTypeStr); | |
for(int i = 0; i < clang_Cursor_getNumArguments(cursor); ++i) | |
{ | |
CXCursor argCursor = clang_Cursor_getArgument(cursor, i); | |
CXType argCursorType = clang_getCursorType(argCursor); | |
CXString typeSpelling = clang_getTypeSpelling(argCursorType); | |
method += format("\t%s", clang_getCString(typeSpelling)); | |
clang_disposeString(typeSpelling); | |
} | |
if(clang_Cursor_isVariadic(cursor)) | |
method += "\t..."; | |
*out++ = method; | |
return CXChildVisit_Continue; | |
}); | |
return out; | |
} | |
template <typename _OutputIter> | |
_OutputIter handle_function (CXCursor cursor, _OutputIter out) | |
{ | |
// function⇥(arg1, arg2, …)⇥return | |
std::string line = name_for_cursor(cursor) + "\t("; | |
for(int i = 0; i < clang_Cursor_getNumArguments(cursor); ++i) | |
{ | |
if(i > 0) | |
line += ", "; | |
CXCursor argCursor = clang_Cursor_getArgument(cursor, i); | |
CXType argCursorType = clang_getCursorType(argCursor); | |
CXString typeSpelling = clang_getTypeSpelling(argCursorType); | |
line += clang_getCString(typeSpelling); | |
clang_disposeString(typeSpelling); | |
std::string argName = name_for_cursor(argCursor); | |
if(!argName.empty()) | |
line += " " + argName; | |
} | |
if(clang_Cursor_isVariadic(cursor)) | |
line += ", ..."; | |
line += ")\t"; | |
CXType resultType = clang_getCursorResultType(cursor); | |
CXString resultTypeStr = clang_getTypeSpelling(resultType); | |
line += clang_getCString(resultTypeStr); | |
clang_disposeString(resultTypeStr); | |
*out++ = line; | |
return out; | |
} | |
int main (int argc, char const* argv[]) | |
{ | |
#if 0 | |
std::map<std::string, std::string> const frameworks = { | |
{ "AddressBook", "kAB|AB" }, | |
{ "AE", "kAE" }, | |
{ "AppKit", "NS" }, | |
{ "CarbonCore", "kFSOperation" }, | |
{ "CFNetwork", "kCF" }, | |
{ "ColorSync", "kColorSync|kCMM" }, | |
{ "CoreData", "NS" }, | |
{ "CoreFoundation", "kCF" }, | |
{ "CoreGraphics", "kCG|CG" }, | |
{ "CoreImage", "kCI" }, | |
{ "CoreText", "kCT" }, | |
{ "CoreVideo", "kCV" }, | |
{ "DiskArbitration", "kDA" }, | |
{ "Foundation", "NS" }, | |
{ "HIServices", "kAX" }, | |
{ "ImageIO", "kCGImage|kCF" }, | |
{ "IOKit", "kIO" }, | |
{ "IOSurface", "kIOSurface" }, | |
{ "LaunchServices", "kLS|kUT" }, | |
{ "Metadata", "kMD" }, | |
{ "OSServices", "kCS|kWS" }, | |
{ "QD", "kCMIlluminant" }, | |
{ "QuartzCore", "kCA|CA" }, | |
{ "SearchKit", "kSK" }, | |
{ "Security", "kSec|gGuid|CSSMOID_" }, | |
{ "SpeechSynthesis", "kSpeech" }, | |
}; | |
#endif | |
CXIndex index = clang_createIndex(0, 0); | |
char const* args[] = { "-x", "objective-c++", "-std=c++1y", "-stdlib=libc++", "--sysroot", "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk" }; | |
CXTranslationUnit tu = clang_parseTranslationUnit(index, "/Users/duff/Shared/Projects/Avian/Other/clang/test.mm", args, sizeof(args) / sizeof(args[0]), nullptr, 0, CXTranslationUnit_None); | |
#if 01 | |
cocoa(tu, @"/Users/duff/Shared/System/Library/Application Support/Avian/Bundles/objective-c.tmbundle/Syntaxes/Platform.tmLanguage"); | |
core_foundation(tu, @"/Users/duff/Shared/System/Library/Application Support/Avian/Bundles/c.tmbundle/Syntaxes/Platform.tmLanguage"); | |
#else | |
std::regex const cocoaPathMatch(".*?/((AddressBook|AppKit|CoreData|CoreImage|ExceptionHandling|Foundation|ImageCaptureCore|ImageKit|PDFKit|PreferencePanes|QTKit|QuartzComposer|QuartzCore|QuartzFilters|QuickLookUI|ScreenSaver|WebKit)\\.framework/.*|objc/[^/]+\\.h)"); | |
__block std::set<std::string> interfaces; | |
__block std::set<std::string> protocols; | |
__block std::set<std::string> methods; | |
__block std::set<std::string> functions; | |
__block std::set<std::string> typedefs; | |
__block std::set<std::string> types; | |
__block std::string lastTypedefName; | |
__block std::set<std::string> lastTypedefConstants; | |
__block std::set<std::string> frameworks; | |
clang_visitChildrenWithBlock(clang_getTranslationUnitCursor(tu), ^(CXCursor cursor, CXCursor parent){ | |
if(clang_getCursorAvailability(cursor) != CXAvailability_Available) | |
return CXChildVisit_Continue; | |
if(!std::regex_match(path_for_cursor(cursor), cocoaPathMatch)) | |
return CXChildVisit_Continue; | |
switch(clang_getCursorKind(cursor)) | |
{ | |
case CXCursor_StructDecl: | |
types.insert(name_for_cursor(cursor)); | |
break; | |
case CXCursor_ObjCProtocolDecl: | |
case CXCursor_ObjCInterfaceDecl: | |
case CXCursor_ObjCCategoryDecl: | |
{ | |
handle_interface_or_category(cursor, std::inserter(methods, methods.end())); | |
if(clang_getCursorKind(cursor) == CXCursor_ObjCInterfaceDecl) | |
interfaces.insert(name_for_cursor(cursor)); | |
else if(clang_getCursorKind(cursor) == CXCursor_ObjCProtocolDecl) | |
protocols.insert(name_for_cursor(cursor)); | |
// std::smatch match; | |
// std::regex_search(path_for_cursor(cursor), match, std::regex("/([^/]+)\\.framework(?!.*?\\.framework/)")); | |
// if(match.size() == 2) | |
// frameworks.insert(match[1].str()); | |
} | |
break; | |
case CXCursor_FunctionDecl: | |
handle_function(cursor, std::inserter(functions, functions.end())); | |
break; | |
case CXCursor_EnumDecl: | |
{ | |
if(!lastTypedefConstants.empty()) | |
{ | |
// fprintf(stderr, "unnamed constants (wasn’t ‘%s’) ·\n", lastTypedefName.c_str()); | |
// for(auto const& t : lastTypedefConstants) | |
// fprintf(stderr, " - %s\n", t.c_str()); | |
lastTypedefConstants.clear(); | |
} | |
lastTypedefConstants.clear(); | |
clang_visitChildrenWithBlock(cursor, ^(CXCursor cursor, CXCursor parent){ | |
if(clang_getCursorKind(cursor) != CXCursor_EnumConstantDecl) | |
return CXChildVisit_Recurse; | |
lastTypedefConstants.insert(name_for_cursor(cursor)); | |
return CXChildVisit_Continue; | |
}); | |
std::string name = name_for_cursor(cursor); | |
if(name.empty() && !lastTypedefName.empty()) | |
{ | |
std::string tmp = lastTypedefName; | |
if(tmp.rfind("Options") + 7 == tmp.size()) | |
tmp = tmp.substr(0, tmp.size()-7); | |
size_t subsetCount = 0; | |
for(auto const& t : lastTypedefConstants) | |
subsetCount += is_subset(tmp, t) ? 1 : 0; | |
if(2*subsetCount > lastTypedefConstants.size()) | |
{ | |
name = lastTypedefName; | |
fprintf(stderr, "fixed via previous typedef: %s\n", name.c_str()); | |
} | |
} | |
if(!name.empty()) | |
{ | |
for(auto const& t : lastTypedefConstants) | |
typedefs.insert(t + "\t\t" + name); | |
lastTypedefConstants.clear(); | |
} | |
} | |
break; | |
case CXCursor_TypedefDecl: | |
{ | |
lastTypedefName = name_for_cursor(cursor); | |
if(!lastTypedefConstants.empty()) | |
{ | |
std::string tmp = lastTypedefName; | |
if(tmp.rfind("Options") + 7 == tmp.size()) | |
tmp = tmp.substr(0, tmp.size()-7); | |
size_t subsetCount = 0; | |
for(auto const& t : lastTypedefConstants) | |
subsetCount += is_subset(tmp, t) ? 1 : 0; | |
if(2*subsetCount > lastTypedefConstants.size()) | |
{ | |
fprintf(stderr, "fixed via following typedef: %s\n", lastTypedefName.c_str()); | |
for(auto const& t : lastTypedefConstants) | |
typedefs.insert(t + "\t\t" + lastTypedefName); | |
lastTypedefConstants.clear(); | |
} | |
} | |
if(!lastTypedefConstants.empty()) | |
{ | |
// fprintf(stderr, "unnamed constants (wasn’t ‘%s’)\n", lastTypedefName.c_str()); | |
// for(auto const& t : lastTypedefConstants) | |
// fprintf(stderr, " - %s\n", t.c_str()); | |
lastTypedefConstants.clear(); | |
} | |
} | |
break; | |
default: | |
return CXChildVisit_Recurse; | |
break; | |
} | |
return CXChildVisit_Continue; | |
}); | |
std::map<char const*, std::set<std::string> const*> map = | |
{ | |
{ "/tmp/cocoa.txt", &methods }, | |
{ "/tmp/CocoaClasses.txt", &interfaces }, | |
{ "/tmp/CocoaProtocols.txt", &protocols }, | |
{ "/tmp/CocoaFunctions.txt", &functions }, | |
{ "/tmp/CocoaConstants.txt", &typedefs }, | |
// { "/tmp/CocoaTypes.txt", &types }, | |
}; | |
for(auto const& pair : map) | |
{ | |
if(FILE* fp = fopen(pair.first, "w")) | |
{ | |
for(auto const& str : *pair.second) | |
fprintf(fp, "%s\n", str.c_str()); | |
fclose(fp); | |
} | |
} | |
#endif | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment