Ebrahim-Ramadan/ass.py

## ass.py
import time
from collections import defaultdict

# readign nodes data from nodes.csv


def read_nodes(file):
    with open(file, 'r', encoding='utf-8') as f:
        lines = f.readlines()[1:]  # Skip header
        nodes = {}
        for line in lines:
            parts = line.strip().split(',')
            nodes[parts[0]] = parts[1]
        return nodes

# read edges data from edges.csv


def read_edges(file):
    with open(file, 'r', encoding='utf-8') as f:
        lines = f.readlines()[1:]  # Skip header
        edges = []
        for line in lines:
            parts = line.strip().split(',')
            edges.append((parts[0], parts[1]))
        return edges

# retrieving highest cited papers


def highest_cited_papers(nodes, edges):
    citation_count = {}
    for edge in edges:
        citation_id = edge[1]
        citation_count[citation_id] = citation_count.get(citation_id, 0) + 1

    sorted_papers = sorted(citation_count.items(),
                           key=lambda x: x[1], reverse=True)

    highest_cited = []
    for paper_id, _ in sorted_papers:
        if paper_id in nodes:
            highest_cited.append((paper_id, nodes[paper_id]))

    return highest_cited

#o find closest group of papers


def closest_group_of_papers(edges, threshold):
    graph = defaultdict(set)
    for edge in edges:
        paper_id, citation_id = edge
        graph[paper_id].add(citation_id)
        graph[citation_id].add(paper_id)

    closest_group = []
    processed = set()

    for start_node in graph:
        if start_node not in processed:
            stack = [start_node]
            group = set()
            while stack:
                node = stack.pop()
                if node not in processed:
                    processed.add(node)
                    group.add(node)
                    stack.extend(graph[node] - processed)
            if len(group) > 1:
                closest_group.append(list(group))

    return [group for group in closest_group if len(group) >= threshold]


nodes_file = 'nodes.csv'
edges_file = 'edges.csv'
start_time = time.time()

nodes_data = read_nodes(nodes_file)
edges_data = read_edges(edges_file)

highest_cited = highest_cited_papers(nodes_data, edges_data)
print("Highest cited papers:")

for paper_id, paper_title in highest_cited[:10]:
    print(f"Paper ID: {paper_id}, Title: {paper_title}")


threshold_value = 5  # You may adjust the threshold value
closest_group = closest_group_of_papers(edges_data, threshold_value)
print(
    f"\nclosest group of papers with at least {threshold_value} common citations:")
print(closest_group)

end_time = time.time()
execution_time = end_time - start_time
print(f"\nExecution time: {execution_time} s")
	import time
	from collections import defaultdict

	# readign nodes data from nodes.csv


	def read_nodes(file):
	with open(file, 'r', encoding='utf-8') as f:
	lines = f.readlines()[1:] # Skip header
	nodes = {}
	for line in lines:
	parts = line.strip().split(',')
	nodes[parts[0]] = parts[1]
	return nodes

	# read edges data from edges.csv


	def read_edges(file):
	with open(file, 'r', encoding='utf-8') as f:
	lines = f.readlines()[1:] # Skip header
	edges = []
	for line in lines:
	parts = line.strip().split(',')
	edges.append((parts[0], parts[1]))
	return edges

	# retrieving highest cited papers


	def highest_cited_papers(nodes, edges):
	citation_count = {}
	for edge in edges:
	citation_id = edge[1]
	citation_count[citation_id] = citation_count.get(citation_id, 0) + 1

	sorted_papers = sorted(citation_count.items(),
	key=lambda x: x[1], reverse=True)

	highest_cited = []
	for paper_id, _ in sorted_papers:
	if paper_id in nodes:
	highest_cited.append((paper_id, nodes[paper_id]))

	return highest_cited

	#o find closest group of papers


	def closest_group_of_papers(edges, threshold):
	graph = defaultdict(set)
	for edge in edges:
	paper_id, citation_id = edge
	graph[paper_id].add(citation_id)
	graph[citation_id].add(paper_id)

	closest_group = []
	processed = set()

	for start_node in graph:
	if start_node not in processed:
	stack = [start_node]
	group = set()
	while stack:
	node = stack.pop()
	if node not in processed:
	processed.add(node)
	group.add(node)
	stack.extend(graph[node] - processed)
	if len(group) > 1:
	closest_group.append(list(group))

	return [group for group in closest_group if len(group) >= threshold]



	nodes_file = 'nodes.csv'
	edges_file = 'edges.csv'
	start_time = time.time()

	nodes_data = read_nodes(nodes_file)
	edges_data = read_edges(edges_file)

	highest_cited = highest_cited_papers(nodes_data, edges_data)
	print("Highest cited papers:")

	for paper_id, paper_title in highest_cited[:10]:
	print(f"Paper ID: {paper_id}, Title: {paper_title}")


	threshold_value = 5 # You may adjust the threshold value
	closest_group = closest_group_of_papers(edges_data, threshold_value)
	print(
	f"\nclosest group of papers with at least {threshold_value} common citations:")
	print(closest_group)

	end_time = time.time()
	execution_time = end_time - start_time
	print(f"\nExecution time: {execution_time} s")