Skip to content

Instantly share code, notes, and snippets.

@thangarajan8
Created October 25, 2021 09:23
Show Gist options
  • Save thangarajan8/f7d541e90796d45e99e8f6df44b50fb9 to your computer and use it in GitHub Desktop.
Save thangarajan8/f7d541e90796d45e99e8f6df44b50fb9 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import json
f_path = "HomePage.java"
with open(f_path,'r') as f:
content = f.read()
def flattern_json(d):
if len(d) == 0:
return {}
from collections import deque
q = deque()
res = dict()
for key, val in d.items(): # This loop push the top most keys and values into queue.
if not isinstance(val, dict): # If it's not dict
if isinstance(val, list): # If it's list then check list values if it contains dict object.
temp = list() # Creating temp list for storing the values that we will need which are not dict.
for v in val:
if not isinstance(v, dict):
temp.append(v)
else:
q.append((key, v)) # if it's value is dict type then we push along with parent which is key.
if len(temp) > 0:
res[key] = temp
else:
res[key] = val
else:
q.append((key, val))
while q:
k, v = q.popleft() # Taking parent and the value out of queue
for key, val in v.items():
new_parent = k + "_" + key # New parent will be old parent_currentval
if isinstance(val, list):
temp = list()
for v in val:
if not isinstance(v, dict):
temp.append(v)
else:
q.append((new_parent, v))
if len(temp) >= 0:
res[new_parent] = temp
elif not isinstance(val, dict):
res[new_parent] = val
else:
q.append((new_parent, val))
return res
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
def json_ast_encoder(o):
if type(o) is set and len(o) == 0:
return []
if hasattr(o, "__dict__"):
return o.__dict__
return ""
def find_end_line_number(node):
"""Finds end line of a node."""
max_line = node.position.line
def traverse(node):
for child in node.children:
if isinstance(child, list) and (len(child) > 0):
for item in child:
traverse(item)
else:
if hasattr(child, '_position'):
nonlocal max_line
if child._position.line > max_line:
max_line = child._position.line
return
traverse(node)
return max_line
with open(f_path,'r') as f:
lines = f.readlines()
import javalang as jl
tree = jl.parse.parse(content)
#jl.tree.BlockStatement
docs = []
failed = False
index = 0
for path, node in tree.filter(jl.tree.MethodDeclaration):
d = {}
if failed == False:
failed_method = None
start_line = node.position.line
try:
# print(node.name)
d['method'] = node.name
d['index'] = index
index += 1
d['start_line'] = start_line
end_line = find_end_line_number(node)
d['code'] = lines[start_line:end_line]
d['end_line'] = end_line
except Exception:
failed_method = node.name
d['end_line'] = 0
failed = True
print(f"Error in {node.name}")
docs.append(d)
df = pd.DataFrame(docs)
def get_new_end_line(index,df,lines_count,end_line):
if end_line == 0:
max_index= df['index'].max()
# print(max_index,index)
if index == max_index:
return lines_count
else:
next_index = index + 1
next_df = df[(df['index']==next_index)]
new_end_line = next_df['start_line'].iloc[0] - 1
return new_end_line
else:
return end_line
lines_count = len(lines)
df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
result = df.to_dict(orient="records")
v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
y = json.loads(v)['types'][0]['body']
z = json.dumps(y)
docs = []
for i in y:
m = flatten_json(i)
docs.append(m)
df = pd.DataFrame(docs)
df_1 = df.dropna(axis=1, how='all')
df_2 = df_1.select_dtypes(include=['object'])
df_3 = df_2.dropna(subset=['name'])
df_json = df.to_dict(orient="records")
for x in tree.filter(jl.tree.Member):
print(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment