Skip to content

Instantly share code, notes, and snippets.

@raymyers
Created March 19, 2024 21:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raymyers/e6901d033f1717d1985d2f9592d70f2f to your computer and use it in GitHub Desktop.
Save raymyers/e6901d033f1717d1985d2f9592d70f2f to your computer and use it in GitHub Desktop.
import infomap
import littletable
import itertools
import json
from sentence_transformers import SentenceTransformer, util
# def simularity(functions):
# # Also consider clustering
# # https://github.com/UKPLab/sentence-transformers/blob/master/examples/applications/clustering/fast_clustering.py
# model = SentenceTransformer('all-MiniLM-L6-v2')
# names = [f.name for f in functions]
# paraphrases = util.paraphrase_mining(model, names)
# # print(embeddings[0:5])
# # Converting to list is a workaround for this bug in ChromaDB:
# # ValueError: The truth value of an array with more than one element is ambiguous
# for (score, i, j) in paraphrases[:len(paraphrases)//4]:
# print("{} \t\t {} \t\t Score: {:.4f}".format(functions[i].name, functions[j].name, score))
# functions[i].paraphrase_ids.append(functions[j].id)
def semantic_clustering(functions):
names = [f.name for f in functions]
model = SentenceTransformer('all-MiniLM-L6-v2')
corpus_embeddings = model.encode(names, batch_size=64, show_progress_bar=True, convert_to_tensor=True)
clusters = util.community_detection(corpus_embeddings, min_community_size=5, threshold=0.50)
for i, cluster in enumerate(clusters):
cluster_fun_ids = [functions[fun_i].id for fun_i in cluster]
cluster_fun_names = [functions[fun_i].name for fun_i in cluster]
print("\nCluster {}, #{} Elements ".format(i+1, len(cluster)))
print(cluster_fun_names)
for fun_i in cluster:
functions[fun_i].cluster_ids = cluster_fun_ids
functions = littletable.Table()
functions.json_import("tags.json")
functions.create_index("name")
# name_to_id = {}
# id_to_name = {}
id = 0
for fun in functions.where():
id+=1
fun.id = id
# id_to_name[id] = name
functions.create_index("id", unique=True)
# functions.add_field('paraphrase_ids', lambda f: [])
functions.add_field('cluster_ids', lambda f: [])
# simularity(functions.where())
semantic_clustering(functions.where())
im = infomap.Infomap(two_level=True, directed=True)
for fun in functions.where():
im.add_node(fun.id, name=fun.name)
for fun in functions.where():
called_fun_ids = []
for called_fun_name in fun.calls_functions:
called_fun_matches = functions.by.name[called_fun_name]
if len(called_fun_matches) > 0:
called_fun_id = called_fun_matches[0].id
called_fun_ids.append(called_fun_id)
weight = 1.0
# if called_fun_id in fun.paraphrase_ids:
# weight = 1.5
im.add_link(fun.id, called_fun_id, weight=weight)
for par_id in fun.cluster_ids:
if par_id not in called_fun_ids:
im.add_link(fun.id, par_id, weight=0.25)
im.run()
im.write_json("modules-tree.json")
print(f"Found {im.num_top_modules} modules with codelength: {im.codelength}")
print("Result")
print("\n# module")
prev_mod = None
for node in im.tree:
if node.is_leaf:
if node.module_id != prev_mod:
print()
fun = functions.by.id[node.node_id]
print(f'{fun.name} in {fun.file}') # , node.module_id
prev_mod = node.module_id
fun_module_pairs = [(functions.by.id[node.node_id], node.module_id) for node in im.tree if node.is_leaf]
grouped_fun_module_pairs = itertools.groupby(fun_module_pairs, lambda n: n[1])
data = [[{'name': fun.name, 'file': fun.file} for (fun, _) in fun_mod_pairs] for (_, fun_mod_pairs) in grouped_fun_module_pairs]
with open("modules.json", "w") as outfile:
json.dump(list(data), outfile, indent=2)
import json
import openai
with open('modules.json') as f:
modules = json.load(f)
misc = []
class GptClient:
def __init__(self, model_name, dry_run: bool=False):
self.model_name = model_name
self.dry_run = dry_run
def fetch_completion(self, user_prompt):
# logger.debug("### Prompt:\n%s\n###", user_prompt)
if self.dry_run:
return "Dummy dry run response"
response = openai.ChatCompletion.create(
model=self.model_name,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_prompt},
],
temperature=0,
)
response_content = response["choices"][0]["message"]["content"]
# logger.debug("### Response:\n%s\n###", response_content)
return response_content
gpt_client = GptClient(model_name="gpt-4")
for module in modules:
if len(module) > 2:
listing_with_file = "\n".join([f"{fun['name']} in {fun['file']}" for fun in module])
listing = "\n".join([fun['name'] for fun in module])
prompt = f"""We have grouped these functions into a module, please give the module's name and description.
{listing}
Respond in the form:
# NAME
Description"""
print(gpt_client.fetch_completion(prompt))
print()
print(listing_with_file)
print()
else:
misc += module
print("MISC (no module):")
print("\n".join([f"{fun['name']} in {fun['file']}" for fun in misc]))
[
[
{
"name": "kread",
"file": "SRC/CACHE1D.C"
},
{
"name": "krand",
"file": "SRC/ENGINE.C"
},
{
"name": "kclose",
"file": "SRC/CACHE1D.C"
},
{
"name": "kfree",
"file": "SRC/ENGINE.C"
},
{
"name": "klseek",
"file": "SRC/CACHE1D.C"
},
{
"name": "kdfread",
"file": "SRC/CACHE1D.C"
},
{
"name": "krecip",
"file": "SRC/ENGINE.C"
},
{
"name": "uncompress",
"file": "SRC/CACHE1D.C"
}
],
[
{
"name": "getzsofslope",
"file": "SRC/ENGINE.C"
},
{
"name": "getflorzofslope",
"file": "SRC/ENGINE.C"
},
{
"name": "getceilzofslope",
"file": "SRC/ENGINE.C"
},
{
"name": "getcrc",
"file": "SRC/MULTI.C"
},
{
"name": "getzrange",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "drawline16",
"file": "SRC/ENGINE.C"
},
{
"name": "spritewallfront",
"file": "SRC/ENGINE.C"
},
{
"name": "maskwallscan",
"file": "SRC/ENGINE.C"
},
{
"name": "transmaskwallscan",
"file": "SRC/ENGINE.C"
},
{
"name": "prepwall",
"file": "SRC/ENGINE.C"
},
{
"name": "wallscan",
"file": "SRC/ENGINE.C"
},
{
"name": "lastwall",
"file": "SRC/ENGINE.C"
},
{
"name": "drawmaskwall",
"file": "SRC/ENGINE.C"
},
{
"name": "drawscreen",
"file": "SRC/GAME.C"
},
{
"name": "drawline256",
"file": "SRC/ENGINE.C"
},
{
"name": "drawmasks",
"file": "SRC/ENGINE.C"
},
{
"name": "sectorofwall",
"file": "SRC/ENGINE.C"
},
{
"name": "setfirstwall",
"file": "SRC/ENGINE.C"
},
{
"name": "loopnumofsector",
"file": "SRC/ENGINE.C"
},
{
"name": "printext256",
"file": "SRC/ENGINE.C"
},
{
"name": "draw2dgrid",
"file": "SRC/ENGINE.C"
},
{
"name": "analyzesprites",
"file": "SRC/GAME.C"
},
{
"name": "movelava",
"file": "SRC/GAME.C"
},
{
"name": "drawoverheadmap",
"file": "SRC/GAME.C"
},
{
"name": "dointerpolations",
"file": "SRC/GAME.C"
},
{
"name": "restoreinterpolations",
"file": "SRC/GAME.C"
},
{
"name": "dragpoint",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "deletespritestat",
"file": "SRC/ENGINE.C"
},
{
"name": "deletespritesect",
"file": "SRC/ENGINE.C"
},
{
"name": "insertspritestat",
"file": "SRC/ENGINE.C"
},
{
"name": "changespritestat",
"file": "SRC/ENGINE.C"
},
{
"name": "deletesprite",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "faketimerhandler",
"file": "SRC/GAME.C"
},
{
"name": "sendpacket",
"file": "SRC/MULTI.C"
},
{
"name": "startcom",
"file": "SRC/MULTI.C"
},
{
"name": "movethings",
"file": "SRC/GAME.C"
},
{
"name": "getpackets",
"file": "SRC/GAME.C"
},
{
"name": "getinput",
"file": "SRC/GAME.C"
},
{
"name": "getoutputcirclesize",
"file": "SRC/MULTI.C"
},
{
"name": "comsend",
"file": "SRC/MULTI.C"
},
{
"name": "getpacket",
"file": "SRC/MULTI.C"
},
{
"name": "getmousevalues",
"file": "SRC/ENGINE.C"
},
{
"name": "processreservedmessage",
"file": "SRC/MULTI.C"
},
{
"name": "sendlogoff",
"file": "SRC/MULTI.C"
},
{
"name": "fillpolygon",
"file": "SRC/ENGINE.C"
},
{
"name": "netuninitconnection",
"file": "SRC/MULTI.C"
},
{
"name": "completemirror",
"file": "SRC/ENGINE.C"
},
{
"name": "sendlogon",
"file": "SRC/MULTI.C"
},
{
"name": "waitforeverybody",
"file": "SRC/GAME.C"
}
],
[
{
"name": "clipinsideboxline",
"file": "SRC/ENGINE.C"
},
{
"name": "clipmove",
"file": "SRC/ENGINE.C"
},
{
"name": "pushmove",
"file": "SRC/ENGINE.C"
},
{
"name": "clipinsidebox",
"file": "SRC/ENGINE.C"
},
{
"name": "clippoly4",
"file": "SRC/ENGINE.C"
},
{
"name": "clippoly",
"file": "SRC/ENGINE.C"
},
{
"name": "rintersect",
"file": "SRC/ENGINE.C"
},
{
"name": "keepaway",
"file": "SRC/ENGINE.C"
},
{
"name": "fakedomovethings",
"file": "SRC/GAME.C"
},
{
"name": "fakedomovethingscorrect",
"file": "SRC/GAME.C"
}
],
[
{
"name": "setaspect",
"file": "SRC/ENGINE.C"
},
{
"name": "changespritesect",
"file": "SRC/ENGINE.C"
},
{
"name": "setstereo",
"file": "SRC/ENGINE.C"
},
{
"name": "movesprite",
"file": "SRC/GAME.C"
},
{
"name": "setsprite",
"file": "SRC/ENGINE.C"
},
{
"name": "initspritelists",
"file": "SRC/ENGINE.C"
},
{
"name": "setears",
"file": "SRC/KDMENG.C"
},
{
"name": "setsocket",
"file": "SRC/MULTI.C"
},
{
"name": "insertspritesect",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "uninstallbikdmhandlers",
"file": "SRC/KDMENG.C"
},
{
"name": "installbistereohandlers",
"file": "SRC/ENGINE.C"
},
{
"name": "uninstallbistereohandlers",
"file": "SRC/ENGINE.C"
},
{
"name": "installbikdmhandlers",
"file": "SRC/KDMENG.C"
},
{
"name": "uninstallbicomhandlers",
"file": "SRC/MULTI.C"
},
{
"name": "installbicomhandlers",
"file": "SRC/MULTI.C"
},
{
"name": "convalloc32",
"file": "SRC/MULTI.C"
},
{
"name": "kdmconvalloc32",
"file": "SRC/KDMENG.C"
},
{
"name": "comon",
"file": "SRC/MULTI.C"
}
],
[
{
"name": "loadtile",
"file": "SRC/ENGINE.C"
},
{
"name": "kopen4load",
"file": "SRC/CACHE1D.C"
},
{
"name": "loadpalette",
"file": "SRC/ENGINE.C"
},
{
"name": "setgamemode",
"file": "SRC/ENGINE.C"
},
{
"name": "loadwaves",
"file": "SRC/KDMENG.C"
},
{
"name": "loadboard",
"file": "SRC/ENGINE.C"
},
{
"name": "loadpics",
"file": "SRC/ENGINE.C"
},
{
"name": "loadgame",
"file": "SRC/GAME.C"
},
{
"name": "savegame",
"file": "SRC/GAME.C"
},
{
"name": "loadsong",
"file": "SRC/KDMENG.C"
},
{
"name": "loadvoxel",
"file": "SRC/ENGINE.C"
},
{
"name": "setbrightness",
"file": "SRC/ENGINE.C"
},
{
"name": "searchmap",
"file": "SRC/GAME.C"
},
{
"name": "initfastcolorlookup",
"file": "SRC/ENGINE.C"
},
{
"name": "suckcache",
"file": "SRC/CACHE1D.C"
},
{
"name": "insertsprite",
"file": "SRC/ENGINE.C"
},
{
"name": "copytilepiece",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "animateoffs",
"file": "SRC/ENGINE.C"
},
{
"name": "rotatesprite",
"file": "SRC/ENGINE.C"
},
{
"name": "wsayfollow",
"file": "SRC/KDMENG.C"
},
{
"name": "printext",
"file": "SRC/GAME.C"
},
{
"name": "dorotatesprite",
"file": "SRC/ENGINE.C"
},
{
"name": "changehealth",
"file": "SRC/GAME.C"
},
{
"name": "drawtilebackground",
"file": "SRC/GAME.C"
},
{
"name": "changenumbombs",
"file": "SRC/GAME.C"
},
{
"name": "changenummissiles",
"file": "SRC/GAME.C"
},
{
"name": "changenumgrabbers",
"file": "SRC/GAME.C"
},
{
"name": "checktouchsprite",
"file": "SRC/GAME.C"
},
{
"name": "checkgrabbertouchsprite",
"file": "SRC/GAME.C"
}
],
[
{
"name": "updatesector",
"file": "SRC/ENGINE.C"
},
{
"name": "inside",
"file": "SRC/ENGINE.C"
},
{
"name": "scansector",
"file": "SRC/ENGINE.C"
},
{
"name": "raytrace",
"file": "SRC/ENGINE.C"
},
{
"name": "operatesector",
"file": "SRC/GAME.C"
},
{
"name": "updatesectorz",
"file": "SRC/GAME.C"
},
{
"name": "setinterpolation",
"file": "SRC/GAME.C"
},
{
"name": "nextsectorneighborz",
"file": "SRC/ENGINE.C"
},
{
"name": "getanimationgoal",
"file": "SRC/GAME.C"
},
{
"name": "setanimation",
"file": "SRC/GAME.C"
}
],
[
{
"name": "drawstatusbar",
"file": "SRC/GAME.C"
},
{
"name": "drawstatusflytime",
"file": "SRC/GAME.C"
},
{
"name": "drawsprite",
"file": "SRC/ENGINE.C"
},
{
"name": "printscreeninterrupt",
"file": "SRC/ENGINE.C"
},
{
"name": "drawvox",
"file": "SRC/ENGINE.C"
},
{
"name": "drawrooms",
"file": "SRC/ENGINE.C"
},
{
"name": "screencapture",
"file": "SRC/ENGINE.C"
},
{
"name": "drawmapview",
"file": "SRC/ENGINE.C"
},
{
"name": "drawalls",
"file": "SRC/ENGINE.C"
},
{
"name": "draw2dscreen",
"file": "SRC/ENGINE.C"
},
{
"name": "readpixel16",
"file": "SRC/ENGINE.C"
},
{
"name": "clear2dscreen",
"file": "SRC/ENGINE.C"
},
{
"name": "ceilspritescan",
"file": "SRC/ENGINE.C"
},
{
"name": "dosetaspect",
"file": "SRC/ENGINE.C"
},
{
"name": "grouscan",
"file": "SRC/ENGINE.C"
},
{
"name": "parascan",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "uninitsb",
"file": "SRC/KDMENG.C"
},
{
"name": "initcache",
"file": "SRC/CACHE1D.C"
},
{
"name": "getsbset",
"file": "SRC/KDMENG.C"
},
{
"name": "initcrc",
"file": "SRC/MULTI.C"
},
{
"name": "initengine",
"file": "SRC/ENGINE.C"
},
{
"name": "initlava",
"file": "SRC/GAME.C"
},
{
"name": "inittimer",
"file": "SRC/GAME.C"
},
{
"name": "initsb",
"file": "SRC/KDMENG.C"
},
{
"name": "ksqrt",
"file": "SRC/ENGINE.C"
},
{
"name": "loadtables",
"file": "SRC/ENGINE.C"
},
{
"name": "initksqrt",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "uninitkeys",
"file": "SRC/GAME.C"
},
{
"name": "initkeys",
"file": "SRC/GAME.C"
},
{
"name": "stereohandler1",
"file": "SRC/ENGINE.C"
},
{
"name": "timerhandler",
"file": "SRC/GAME.C"
},
{
"name": "keyhandler",
"file": "SRC/GAME.C"
},
{
"name": "comhandler",
"file": "SRC/MULTI.C"
},
{
"name": "sbhandler",
"file": "SRC/KDMENG.C"
}
],
[
{
"name": "simulateint",
"file": "SRC/MULTI.C"
},
{
"name": "netsend",
"file": "SRC/MULTI.C"
},
{
"name": "netinitconnection",
"file": "SRC/MULTI.C"
},
{
"name": "netoff",
"file": "SRC/MULTI.C"
},
{
"name": "neton",
"file": "SRC/MULTI.C"
},
{
"name": "musicon",
"file": "SRC/KDMENG.C"
}
],
[
{
"name": "allocache",
"file": "SRC/CACHE1D.C"
},
{
"name": "reportandexit",
"file": "SRC/CACHE1D.C"
},
{
"name": "dfwrite",
"file": "SRC/CACHE1D.C"
},
{
"name": "compress",
"file": "SRC/CACHE1D.C"
},
{
"name": "allocatepermanenttile",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "setview",
"file": "SRC/ENGINE.C"
},
{
"name": "clearallviews",
"file": "SRC/ENGINE.C"
},
{
"name": "clearview",
"file": "SRC/ENGINE.C"
},
{
"name": "setviewtotile",
"file": "SRC/ENGINE.C"
},
{
"name": "setviewback",
"file": "SRC/ENGINE.C"
},
{
"name": "view",
"file": "SRC/GAME.C"
}
],
[
{
"name": "getpalookup",
"file": "SRC/ENGINE.C"
},
{
"name": "transmaskvline",
"file": "SRC/ENGINE.C"
},
{
"name": "hline",
"file": "SRC/ENGINE.C"
},
{
"name": "slowhline",
"file": "SRC/ENGINE.C"
},
{
"name": "transmaskvline2",
"file": "SRC/ENGINE.C"
},
{
"name": "ceilspritehline",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "wallfront",
"file": "SRC/ENGINE.C"
},
{
"name": "wallmost",
"file": "SRC/ENGINE.C"
},
{
"name": "hitscan",
"file": "SRC/ENGINE.C"
},
{
"name": "ceilscan",
"file": "SRC/ENGINE.C"
},
{
"name": "florscan",
"file": "SRC/ENGINE.C"
},
{
"name": "owallmost",
"file": "SRC/ENGINE.C"
},
{
"name": "bunchfront",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "uninitengine",
"file": "SRC/ENGINE.C"
},
{
"name": "uninittimer",
"file": "SRC/GAME.C"
},
{
"name": "uninitgroupfile",
"file": "SRC/CACHE1D.C"
},
{
"name": "uninitmultiplayers",
"file": "SRC/MULTI.C"
},
{
"name": "comoff",
"file": "SRC/MULTI.C"
},
{
"name": "setup3dscreen",
"file": "SRC/GAME.C"
},
{
"name": "prepareboard",
"file": "SRC/GAME.C"
},
{
"name": "initmouse",
"file": "SRC/ENGINE.C"
},
{
"name": "playback",
"file": "SRC/GAME.C"
},
{
"name": "initgroupfile",
"file": "SRC/CACHE1D.C"
},
{
"name": "initmultiplayers",
"file": "SRC/MULTI.C"
},
{
"name": "main",
"file": "SRC/GAME.C"
}
],
[
{
"name": "startwave",
"file": "SRC/KDMENG.C"
},
{
"name": "preparesndbuf",
"file": "SRC/KDMENG.C"
},
{
"name": "wsay",
"file": "SRC/KDMENG.C"
}
],
[
{
"name": "stopinterpolation",
"file": "SRC/GAME.C"
},
{
"name": "cansee",
"file": "SRC/ENGINE.C"
},
{
"name": "warpsprite",
"file": "SRC/GAME.C"
},
{
"name": "domovethings",
"file": "SRC/GAME.C"
},
{
"name": "warp",
"file": "SRC/GAME.C"
},
{
"name": "statuslistcode",
"file": "SRC/GAME.C"
},
{
"name": "tagcode",
"file": "SRC/GAME.C"
},
{
"name": "checkmasterslaveswitch",
"file": "SRC/GAME.C"
},
{
"name": "updateinterpolations",
"file": "SRC/GAME.C"
},
{
"name": "doanimations",
"file": "SRC/GAME.C"
},
{
"name": "rotatepoint",
"file": "SRC/ENGINE.C"
},
{
"name": "testneighborsectors",
"file": "SRC/GAME.C"
},
{
"name": "bombexplode",
"file": "SRC/GAME.C"
}
],
[
{
"name": "makepalookup",
"file": "SRC/ENGINE.C"
},
{
"name": "initplayersprite",
"file": "SRC/GAME.C"
},
{
"name": "getclosestcol",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "getangle",
"file": "SRC/ENGINE.C"
},
{
"name": "preparemirror",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "shootgun",
"file": "SRC/GAME.C"
},
{
"name": "activatehitag",
"file": "SRC/GAME.C"
},
{
"name": "operatesprite",
"file": "SRC/GAME.C"
},
{
"name": "processinput",
"file": "SRC/GAME.C"
},
{
"name": "findrandomspot",
"file": "SRC/GAME.C"
}
],
[
{
"name": "musicoff",
"file": "SRC/KDMENG.C"
}
],
[
{
"name": "nextpage",
"file": "SRC/ENGINE.C"
},
{
"name": "stereonextpage",
"file": "SRC/ENGINE.C"
},
{
"name": "agecache",
"file": "SRC/CACHE1D.C"
}
],
[
{
"name": "lintersect",
"file": "SRC/ENGINE.C"
},
{
"name": "neartag",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "flushperms",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "kfilelength",
"file": "SRC/CACHE1D.C"
},
{
"name": "qloadkvx",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "printext16",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "plotpixel",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "getpixel",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "squarerotatetile",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "alignceilslope",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "alignflorslope",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "qsetmode640350",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "stereohandler2",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "dfread",
"file": "SRC/CACHE1D.C"
}
],
[
{
"name": "saveboard",
"file": "SRC/ENGINE.C"
}
],
[
{
"name": "getsndbufinfo",
"file": "SRC/KDMENG.C"
}
],
[
{
"name": "qsetmode640480",
"file": "SRC/ENGINE.C"
}
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment