thangarajan8/javalang_parser_V1.py

## javalang_parser_V1.py

import pandas as pd
import numpy as np
import json
f_path = "HomePage.java"
with open(f_path,'r') as f:
    content = f.read()
def flattern_json(d):
    if len(d) == 0:
        return {}
    from collections import deque
    q = deque()
    res = dict()
    for key, val in d.items(): # This loop push the top most keys and values into queue.
        if not isinstance(val, dict):  # If it's not dict
            if isinstance(val, list):  # If it's list then check list values if it contains dict object.
                temp = list()  # Creating temp list for storing the values that we will need which are not dict.
                for v in val:
                    if not isinstance(v, dict):
                        temp.append(v)
                    else:
                        q.append((key, v))  # if it's value is dict type then we push along with parent which is key.
                if len(temp) > 0:
                    res[key] = temp
            else:
                res[key] = val
        else:
            q.append((key, val))
    while q:
        k, v = q.popleft()  # Taking parent and the value out of queue
        for key, val in v.items():
            new_parent = k + "_" + key  # New parent will be old parent_currentval
            if isinstance(val, list):
                temp = list()
                for v in val:
                    if not isinstance(v, dict):
                        temp.append(v)
                    else:
                        q.append((new_parent, v))
                if len(temp) >= 0:
                    res[new_parent] = temp
            elif not isinstance(val, dict):
                res[new_parent] = val
            else:
                q.append((new_parent, val))
    return res

def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out
def json_ast_encoder(o):
    if type(o) is set and len(o) == 0:
        return []
    if hasattr(o, "__dict__"):
        return o.__dict__
    return ""
def find_end_line_number(node):
    """Finds end line of a node."""
    max_line = node.position.line

    def traverse(node):
        for child in node.children:
            if isinstance(child, list) and (len(child) > 0):
                for item in child:
                    traverse(item)
            else:
                if hasattr(child, '_position'):
                    nonlocal max_line
                    if child._position.line > max_line:
                        max_line = child._position.line
                        return

    traverse(node)
    return max_line
with open(f_path,'r') as f:
    lines = f.readlines()
import javalang as jl
tree = jl.parse.parse(content)
#jl.tree.BlockStatement

docs = []
failed = False
index = 0
for path, node in tree.filter(jl.tree.MethodDeclaration):
    d = {}
    if failed == False:
        failed_method = None
    start_line = node.position.line
    try:

#        print(node.name)
        d['method'] = node.name
        d['index'] = index
        index += 1
        d['start_line'] = start_line
        end_line = find_end_line_number(node)
        d['code']  = lines[start_line:end_line]
        d['end_line'] = end_line
    except Exception:
        failed_method = node.name
        d['end_line'] = 0
        failed = True
        print(f"Error in {node.name}")
    docs.append(d)


df = pd.DataFrame(docs)

def get_new_end_line(index,df,lines_count,end_line):
    if end_line == 0:
        max_index= df['index'].max()
#        print(max_index,index)
        if index == max_index:
            return lines_count
        else:
            next_index = index + 1
            next_df = df[(df['index']==next_index)]
            new_end_line = next_df['start_line'].iloc[0] - 1
            return new_end_line
    else:
        return end_line
lines_count = len(lines)

df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
result = df.to_dict(orient="records")


v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
y = json.loads(v)['types'][0]['body']
z = json.dumps(y)
docs = []
for i in y:
    m = flatten_json(i)
    docs.append(m)
df = pd.DataFrame(docs)
df_1 = df.dropna(axis=1, how='all')
df_2 = df_1.select_dtypes(include=['object'])
df_3 = df_2.dropna(subset=['name'])

df_json = df.to_dict(orient="records")
for x  in tree.filter(jl.tree.Member):
    print(x)

	import pandas as pd
	import numpy as np
	import json
	f_path = "HomePage.java"
	with open(f_path,'r') as f:
	content = f.read()
	def flattern_json(d):
	if len(d) == 0:
	return {}
	from collections import deque
	q = deque()
	res = dict()
	for key, val in d.items(): # This loop push the top most keys and values into queue.
	if not isinstance(val, dict): # If it's not dict
	if isinstance(val, list): # If it's list then check list values if it contains dict object.
	temp = list() # Creating temp list for storing the values that we will need which are not dict.
	for v in val:
	if not isinstance(v, dict):
	temp.append(v)
	else:
	q.append((key, v)) # if it's value is dict type then we push along with parent which is key.
	if len(temp) > 0:
	res[key] = temp
	else:
	res[key] = val
	else:
	q.append((key, val))
	while q:
	k, v = q.popleft() # Taking parent and the value out of queue
	for key, val in v.items():
	new_parent = k + "_" + key # New parent will be old parent_currentval
	if isinstance(val, list):
	temp = list()
	for v in val:
	if not isinstance(v, dict):
	temp.append(v)
	else:
	q.append((new_parent, v))
	if len(temp) >= 0:
	res[new_parent] = temp
	elif not isinstance(val, dict):
	res[new_parent] = val
	else:
	q.append((new_parent, val))
	return res

	def flatten_json(y):
	out = {}

	def flatten(x, name=''):
	if type(x) is dict:
	for a in x:
	flatten(x[a], name + a + '_')
	elif type(x) is list:
	i = 0
	for a in x:
	flatten(a, name + str(i) + '_')
	i += 1
	else:
	out[name[:-1]] = x

	flatten(y)
	return out
	def json_ast_encoder(o):
	if type(o) is set and len(o) == 0:
	return []
	if hasattr(o, "__dict__"):
	return o.__dict__
	return ""
	def find_end_line_number(node):
	"""Finds end line of a node."""
	max_line = node.position.line

	def traverse(node):
	for child in node.children:
	if isinstance(child, list) and (len(child) > 0):
	for item in child:
	traverse(item)
	else:
	if hasattr(child, '_position'):
	nonlocal max_line
	if child._position.line > max_line:
	max_line = child._position.line
	return

	traverse(node)
	return max_line
	with open(f_path,'r') as f:
	lines = f.readlines()
	import javalang as jl
	tree = jl.parse.parse(content)
	#jl.tree.BlockStatement

	docs = []
	failed = False
	index = 0
	for path, node in tree.filter(jl.tree.MethodDeclaration):
	d = {}
	if failed == False:
	failed_method = None
	start_line = node.position.line
	try:

	# print(node.name)
	d['method'] = node.name
	d['index'] = index
	index += 1
	d['start_line'] = start_line
	end_line = find_end_line_number(node)
	d['code'] = lines[start_line:end_line]
	d['end_line'] = end_line
	except Exception:
	failed_method = node.name
	d['end_line'] = 0
	failed = True
	print(f"Error in {node.name}")
	docs.append(d)



	df = pd.DataFrame(docs)

	def get_new_end_line(index,df,lines_count,end_line):
	if end_line == 0:
	max_index= df['index'].max()
	# print(max_index,index)
	if index == max_index:
	return lines_count
	else:
	next_index = index + 1
	next_df = df[(df['index']==next_index)]
	new_end_line = next_df['start_line'].iloc[0] - 1
	return new_end_line
	else:
	return end_line
	lines_count = len(lines)

	df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
	result = df.to_dict(orient="records")


	v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
	y = json.loads(v)['types'][0]['body']
	z = json.dumps(y)
	docs = []
	for i in y:
	m = flatten_json(i)
	docs.append(m)
	df = pd.DataFrame(docs)
	df_1 = df.dropna(axis=1, how='all')
	df_2 = df_1.select_dtypes(include=['object'])
	df_3 = df_2.dropna(subset=['name'])

	df_json = df.to_dict(orient="records")
	for x in tree.filter(jl.tree.Member):
	print(x)