Skip to content

Instantly share code, notes, and snippets.

Created October 25, 2021 09:23
Show Gist options
  • Save thangarajan8/f7d541e90796d45e99e8f6df44b50fb9 to your computer and use it in GitHub Desktop.
Save thangarajan8/f7d541e90796d45e99e8f6df44b50fb9 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import json
f_path = ""
with open(f_path,'r') as f:
content =
def flattern_json(d):
if len(d) == 0:
return {}
from collections import deque
q = deque()
res = dict()
for key, val in d.items(): # This loop push the top most keys and values into queue.
if not isinstance(val, dict): # If it's not dict
if isinstance(val, list): # If it's list then check list values if it contains dict object.
temp = list() # Creating temp list for storing the values that we will need which are not dict.
for v in val:
if not isinstance(v, dict):
q.append((key, v)) # if it's value is dict type then we push along with parent which is key.
if len(temp) > 0:
res[key] = temp
res[key] = val
q.append((key, val))
while q:
k, v = q.popleft() # Taking parent and the value out of queue
for key, val in v.items():
new_parent = k + "_" + key # New parent will be old parent_currentval
if isinstance(val, list):
temp = list()
for v in val:
if not isinstance(v, dict):
q.append((new_parent, v))
if len(temp) >= 0:
res[new_parent] = temp
elif not isinstance(val, dict):
res[new_parent] = val
q.append((new_parent, val))
return res
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
out[name[:-1]] = x
return out
def json_ast_encoder(o):
if type(o) is set and len(o) == 0:
return []
if hasattr(o, "__dict__"):
return o.__dict__
return ""
def find_end_line_number(node):
"""Finds end line of a node."""
max_line = node.position.line
def traverse(node):
for child in node.children:
if isinstance(child, list) and (len(child) > 0):
for item in child:
if hasattr(child, '_position'):
nonlocal max_line
if child._position.line > max_line:
max_line = child._position.line
return max_line
with open(f_path,'r') as f:
lines = f.readlines()
import javalang as jl
tree = jl.parse.parse(content)
docs = []
failed = False
index = 0
for path, node in tree.filter(jl.tree.MethodDeclaration):
d = {}
if failed == False:
failed_method = None
start_line = node.position.line
# print(
d['method'] =
d['index'] = index
index += 1
d['start_line'] = start_line
end_line = find_end_line_number(node)
d['code'] = lines[start_line:end_line]
d['end_line'] = end_line
except Exception:
failed_method =
d['end_line'] = 0
failed = True
print(f"Error in {}")
df = pd.DataFrame(docs)
def get_new_end_line(index,df,lines_count,end_line):
if end_line == 0:
max_index= df['index'].max()
# print(max_index,index)
if index == max_index:
return lines_count
next_index = index + 1
next_df = df[(df['index']==next_index)]
new_end_line = next_df['start_line'].iloc[0] - 1
return new_end_line
return end_line
lines_count = len(lines)
df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
result = df.to_dict(orient="records")
v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
y = json.loads(v)['types'][0]['body']
z = json.dumps(y)
docs = []
for i in y:
m = flatten_json(i)
df = pd.DataFrame(docs)
df_1 = df.dropna(axis=1, how='all')
df_2 = df_1.select_dtypes(include=['object'])
df_3 = df_2.dropna(subset=['name'])
df_json = df.to_dict(orient="records")
for x in tree.filter(jl.tree.Member):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment