- single include file from https://github.com/nlohmann/json
- PHP source code
- python3.8
- g++ --std=c++11
http://www.hackingwithphp.com/4/2/0/how-to-read-function-prototypes
python3 main.py
http://www.hackingwithphp.com/4/2/0/how-to-read-function-prototypes
python3 main.py
import os | |
import re | |
import json | |
import subprocess | |
from tokenizer import tokenize | |
os.system('g++ --std=c++11 parser.cpp -o parser -O2') | |
def parse_prototype(loc, info): | |
info['meta']['location'] = loc | |
params = info['params'] | |
plist = [] | |
if params != '' and params != 'void': | |
for p in params.replace('[', '').replace(']', '').split(','): | |
p = p.strip() | |
if not p: | |
continue | |
plist.append({'optional': True}) | |
p = p.split('=') | |
if len(p) > 1: | |
plist[-1]['default'] = p[1] | |
p = p[0].strip().split(' ') | |
if len(p) > 1: | |
plist[-1]['type'] = p[0] | |
plist[-1]['name'] = p[-1] | |
while True: | |
l = len(params) | |
params = re.sub( | |
r'\[[ ,]*(\$?[a-z][ a-z0-9_.]* ?=?(.*)?)\]', '', params, | |
flags=re.IGNORECASE | |
).strip() | |
if len(params) == l: | |
break | |
for p in params.split(','): | |
p = p.strip() | |
if not p: | |
continue | |
res = p.split(' ') | |
res.append(None) | |
for i in plist: | |
if i['name'] == res[1]: | |
i['optional'] = False | |
info['params'] = plist | |
return info | |
def create_db(version, directory): | |
results = [] | |
for path, dirs, files in os.walk(directory, topdown=True): | |
# print(path) | |
for filename in files: | |
if filename.endswith('.c') and 'test' not in filename: | |
location = os.path.join(path[len(directory):], filename) | |
# print(filename) | |
proc = subprocess.Popen( | |
["./parser", os.path.join(path, filename)], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
stdin=subprocess.PIPE | |
) | |
stdout, stderr = proc.communicate() | |
# print(os.path.join(path, filename)) | |
for comment in json.loads(stdout): | |
results.append(parse_prototype(location, comment)) | |
# print(location, defs) | |
# results[location] = defs | |
print(f'parsed {len(results)} functions') | |
with open(version + '.json', 'w') as f: | |
f.write(json.dumps(results)) | |
create_db('7.4.5', '/Users/frankli/.phpbrew/build/php-7.4.5/') | |
create_db('7.0.9', '/Users/frankli/.phpbrew/build/php-7.0.9/') |
#include "json.hpp" // https://github.com/nlohmann/json | |
#include <iostream> | |
#include <string> | |
#include <unordered_map> | |
#include <vector> | |
using namespace std; | |
using namespace nlohmann; | |
enum state { | |
null, | |
comment, | |
identifier | |
}; | |
json parse(const string &proto) { | |
json ret = {{"meta", {{"proto", proto}}}}; | |
int i = 0, cnt = 0, marker = 0; | |
while (i < proto.size()) { | |
if (proto[i] == '(') { | |
if (cnt == 0) { | |
string name = proto.substr(0, i); | |
int tok = name.find(' '); | |
if (tok != -1) { | |
ret["returns"] = name.substr(0, tok); | |
name = name.substr(tok + 1); | |
} | |
tok = name.find("::"); | |
if (tok != -1) { | |
ret["class"] = name.substr(0, tok); | |
name = name.substr(tok + 2); | |
} | |
ret["name"] = name; | |
} | |
marker = i, cnt++; | |
} else if (proto[i] == ')') { | |
cnt--; | |
if (cnt == 0) { | |
ret["params"] = proto.substr(marker + 1, i - marker - 1); | |
break; | |
} | |
} | |
i++; | |
} | |
if (i == proto.size()) { | |
string try_fix_proto; | |
int lb = proto.find('\n'); | |
if (lb == -1) | |
lb = proto.size(); | |
string first = proto.substr(0, lb); | |
if (first.find('(') != -1) | |
try_fix_proto = first + ')' + proto.substr(lb); | |
else | |
try_fix_proto = first + "()" + proto.substr(lb); | |
return parse(try_fix_proto); | |
} | |
string desc = proto.substr(i + 1); | |
desc.erase(0, desc.find_first_not_of(" \t\n\r\f\v")); | |
ret["desc"] = desc; | |
return ret; | |
} | |
vector<json> extract(const string &file) { | |
int mark_pos = 0; | |
state state = null; | |
vector<json> ret; | |
for (int i = 0; i < file.size(); i++) { | |
char current = file[i], last = file[i - 1]; | |
switch (current) { | |
case '*': | |
if (last == '/') { | |
mark_pos = i++; | |
state = comment; | |
} | |
break; | |
case '/': | |
if (state == comment && last == '*') { | |
string comment = file.substr(mark_pos + 1, i - mark_pos - 2); | |
if (comment.find(" {{{ proto ") != -1) | |
ret.emplace_back(parse( | |
comment.substr(11))); | |
state = null; | |
} | |
break; | |
} | |
} | |
return ret; | |
} | |
int main(int argc, char **argv) { | |
if (argc < 2) | |
return 0; | |
auto file = fopen(argv[1], "r"); | |
char buf[2048]; | |
string proto; | |
int len; | |
while ((len = fread(buf, sizeof(char), 2048, file))) { | |
buf[len] = 0; | |
proto += buf; | |
} | |
fclose(file); | |
auto result = extract(proto); | |
cout << result; | |
} |