Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save frankli0324/a754fc9ca60de47726ba976952b566ba to your computer and use it in GitHub Desktop.
Save frankli0324/a754fc9ca60de47726ba976952b566ba to your computer and use it in GitHub Desktop.
import os
import re
import json
import subprocess
from tokenizer import tokenize
os.system('g++ --std=c++11 parser.cpp -o parser -O2')
def parse_prototype(loc, info):
info['meta']['location'] = loc
params = info['params']
plist = []
if params != '' and params != 'void':
for p in params.replace('[', '').replace(']', '').split(','):
p = p.strip()
if not p:
continue
plist.append({'optional': True})
p = p.split('=')
if len(p) > 1:
plist[-1]['default'] = p[1]
p = p[0].strip().split(' ')
if len(p) > 1:
plist[-1]['type'] = p[0]
plist[-1]['name'] = p[-1]
while True:
l = len(params)
params = re.sub(
r'\[[ ,]*(\$?[a-z][ a-z0-9_.]* ?=?(.*)?)\]', '', params,
flags=re.IGNORECASE
).strip()
if len(params) == l:
break
for p in params.split(','):
p = p.strip()
if not p:
continue
res = p.split(' ')
res.append(None)
for i in plist:
if i['name'] == res[1]:
i['optional'] = False
info['params'] = plist
return info
def create_db(version, directory):
results = []
for path, dirs, files in os.walk(directory, topdown=True):
# print(path)
for filename in files:
if filename.endswith('.c') and 'test' not in filename:
location = os.path.join(path[len(directory):], filename)
# print(filename)
proc = subprocess.Popen(
["./parser", os.path.join(path, filename)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE
)
stdout, stderr = proc.communicate()
# print(os.path.join(path, filename))
for comment in json.loads(stdout):
results.append(parse_prototype(location, comment))
# print(location, defs)
# results[location] = defs
print(f'parsed {len(results)} functions')
with open(version + '.json', 'w') as f:
f.write(json.dumps(results))
create_db('7.4.5', '/Users/frankli/.phpbrew/build/php-7.4.5/')
create_db('7.0.9', '/Users/frankli/.phpbrew/build/php-7.0.9/')
#include "json.hpp" // https://github.com/nlohmann/json
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
using namespace std;
using namespace nlohmann;
enum state {
null,
comment,
identifier
};
json parse(const string &proto) {
json ret = {{"meta", {{"proto", proto}}}};
int i = 0, cnt = 0, marker = 0;
while (i < proto.size()) {
if (proto[i] == '(') {
if (cnt == 0) {
string name = proto.substr(0, i);
int tok = name.find(' ');
if (tok != -1) {
ret["returns"] = name.substr(0, tok);
name = name.substr(tok + 1);
}
tok = name.find("::");
if (tok != -1) {
ret["class"] = name.substr(0, tok);
name = name.substr(tok + 2);
}
ret["name"] = name;
}
marker = i, cnt++;
} else if (proto[i] == ')') {
cnt--;
if (cnt == 0) {
ret["params"] = proto.substr(marker + 1, i - marker - 1);
break;
}
}
i++;
}
if (i == proto.size()) {
string try_fix_proto;
int lb = proto.find('\n');
if (lb == -1)
lb = proto.size();
string first = proto.substr(0, lb);
if (first.find('(') != -1)
try_fix_proto = first + ')' + proto.substr(lb);
else
try_fix_proto = first + "()" + proto.substr(lb);
return parse(try_fix_proto);
}
string desc = proto.substr(i + 1);
desc.erase(0, desc.find_first_not_of(" \t\n\r\f\v"));
ret["desc"] = desc;
return ret;
}
vector<json> extract(const string &file) {
int mark_pos = 0;
state state = null;
vector<json> ret;
for (int i = 0; i < file.size(); i++) {
char current = file[i], last = file[i - 1];
switch (current) {
case '*':
if (last == '/') {
mark_pos = i++;
state = comment;
}
break;
case '/':
if (state == comment && last == '*') {
string comment = file.substr(mark_pos + 1, i - mark_pos - 2);
if (comment.find(" {{{ proto ") != -1)
ret.emplace_back(parse(
comment.substr(11)));
state = null;
}
break;
}
}
return ret;
}
int main(int argc, char **argv) {
if (argc < 2)
return 0;
auto file = fopen(argv[1], "r");
char buf[2048];
string proto;
int len;
while ((len = fread(buf, sizeof(char), 2048, file))) {
buf[len] = 0;
proto += buf;
}
fclose(file);
auto result = extract(proto);
cout << result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment