Skip to content

Instantly share code, notes, and snippets.

@yonik
Created January 3, 2018 22:34
Show Gist options
  • Save yonik/9f7556f0326c5a9a55b98f944960149b to your computer and use it in GitHub Desktop.
Save yonik/9f7556f0326c5a9a55b98f944960149b to your computer and use it in GitHub Desktop.
getclasses.py
import javalang
import sys
def addextends(out, etype, val):
if isinstance(etype, list):
for et in etype:
addextends(out, et, val)
return
if not etype:
ename = "Object"
else:
ename = etype.name
print "Type ",val,"extends",ename
out.setdefault(ename,[]).append(val)
def getextends(out, javafiles):
for jfile in javafiles:
if jfile.startswith('#'): continue
print "opening ", jfile
javastring = open(jfile, 'r').read()
tree = javalang.parse.parse(javastring)
for t in tree.types:
# print t, t.attrs
#addextends(out, t.extends, t) #if we wanted to keep a mapping to the whole type
extends = getattr(t,"extends",None)
addextends(out, extends, t.name)
# extends = getattr(t,"implements",None)
# addextends(out, extends, t.name)
def getsubs(emap, name):
ret = emap.get(name)
if ret:
#recurse
sublist=[]
for sub in ret:
subsub = getsubs(emap, sub)
if subsub: sublist += subsub
ret += sublist
else:
ret = []
return ret
#emap = {}
#
#files = []
#for line in sys.stdin:
# files += line.split()
#
#getextends(emap, files)
emap4 = {}
emap7 = {}
#getextends(emap4, open("small4.txt","r").read().split())
#getextends(emap7, open("small7.txt","r").read().split())
getextends(emap4, open("lusolr4files.txt","r").read().split())
getextends(emap7, open("lusolr7files.txt","r").read().split())
# test to make sure that we're getting whole hierarchy
# emap4['ApostropheFilterFactory']=['MyApostropheFilterFactory']
# find /opt/code/lusolr/4x -name \*.java | grep -v '/test' > lusolr4files.txt
# find /opt/code/lusolr/70 -name \*.java | grep -v '/test' > lusolr7files.txt
# issues parsing the following file, comment it out
# /opt/code/lusolr/70//lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
# extends Analyzer TokenStream FieldType
classes=["TokenizerFactory","TokenFilterFactory","Analyzer","FieldType","SimilarityFactory"]
for cls in classes:
print "### Processing type", cls
r4 = getsubs(emap4, cls)
r7 = getsubs(emap7, cls)
print set(r4) - set(r7)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment