Skip to content

Instantly share code, notes, and snippets.

@liyunrui
Created January 18, 2021 06:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liyunrui/361c74acc16ba47b1dd201c64df4e90a to your computer and use it in GitHub Desktop.
Save liyunrui/361c74acc16ba47b1dd201c64df4e90a to your computer and use it in GitHub Desktop.
leetcode-graph
class UnionFindSet:
def __init__(self, n):
#Initially, all elements are single element subsets.
self._parents = [node for node in range(n)]
# it's like height of tree but it's not always equal to height because path compression technique.
self._ranks = [1 for i in range(n)]
def find(self, u):
"""
The find method with path compression, return root of node u.
"""
while u != self._parents[u]:
# pass compression technique: make x and all parents of x pointed to root node of x so that we don’t have to traverse all intermediate nodes again.(flat tree)
self._parents[u] = self._parents[self._parents[u]]
u = self._parents[u]
return u
def union(self, u, v):
"""
The union method, with optimization union by rank. It returns False if a u and v are in the same tree/connected already, True if otherwise.
"""
root_u, root_v = self.find(u), self.find(v)
if root_u == root_v:
return False
if self._ranks[root_u] < self._ranks[root_v]:
self._parents[root_u] = root_v
elif self._ranks[root_u] > self._ranks[root_v]:
self._parents[root_v] = root_u
else:
# If ranks are same, then make one as root and increment its rank by one
self._parents[root_v] = root_u
self._ranks[root_u] += 1
return True
class Solution:
"""
Problem Clarification:
1.same name but might be different account
2.same email mean they're same account and must be same name
3.return email in sorted order
Thought Process:
Graph-DFS
1.draw edges between emails if they occur in the same account. Then, we come down to finding all nodes in same connected component because it represents they're same account.
2.Let's put in this way, think of email as a node. Name represent this connected component.
For example,
{NameA, 1, 2, 3} => NameA -- 1 -- 2 -- 3
{NameA, 2, 4, 5} => NameA -- 2 -- 4 -- 5 (The same graph node 2 appears)
{NameB, 6, 7, 8} => NameB -- 6 -- 7 -- 8
(Where numbers represent email addresses).
(Name A)
1-2-3
|
4-5 6-7-8 (NameB)
# build graph from first email to other emails
1-2-4 6-7
| | |
3 5 8
Step1: build the graph and create email_to_name hashmap
Step2: we iterate all possible nodes(e-mail), for each node, we do dfs search on graph. Think of edge as first email to other emails.
Step2-1
if node is not visted, every node in the same connected conected component is from same accout
Step2-2
do a dfs on graph to find a list to called component to store all nodes(email) in this graph
Step2-3
append name + sorted compnent into ans list which we'd like to return in the end.
Union-Find
Step1: build the graph and create email_to_name and email_to_id hashmap
Step2: do union for each email. edge is first email to other emails.
Step3: we iterate all nodes.
If root node is same, we append this email to answer
Note:
1. visited array is used in the case node represent with id 0 to n
2. visited set is used in the case node represnet with string
3. there's a cycle
Reference:
https://leetcode.com/problems/accounts-merge/discuss/109161/Python-Simple-DFS-with-explanation!!!
"""
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
"""
DFS
T:O(a_i) + O(n) + O(b_i *logb_i) where n is number of unique emails and b_i is lengh of components[i] and a_i is length of accounts[i]
S:O(n) the space used by graph and email_to_name
"""
# step1
# g = collections.defaultdict(list)
g = collections.defaultdict(set) # the reason we use set is to avoid there's duplicate edge in our graph
email_to_name = {}
for acc in accounts:
name = acc[0]
for email in acc[1:]:
email_to_name[email] = name
# graph from first email to other emails
# g[email].append(acc[1])
# g[acc[1]].append(email)
g[email].add(acc[1])
g[acc[1]].add(email)
# since it's undirected graph
# step2
n = len(email_to_name)
#visited = [False for _ in range(n)] # used for node represents with id 0 to n
visited = set([])
accounts_merge = []
for node in email_to_name.keys():
if node not in visited:
components = []
self.dfs(node, components ,visited, g)
name = email_to_name[node]
accounts_merge.append([name]+sorted(components))
return accounts_merge
def dfs(self, cur, components ,visited, graph):
visited.add(cur)
components.append(cur)
for nei_node in graph[cur]:
if nei_node not in visited:
self.dfs(nei_node, components ,visited, graph)
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
"""
Union-Find
"""
# step1:
email_to_name = {}
email_to_id = {}
i = 0
for acc in accounts:
name = acc[0]
for email in acc[1:]:
email_to_name[email] = name
if email not in email_to_id:
email_to_id[email] = i
i += 1
# step2: union nodes with the edge
n = len(email_to_name)
unionFind = UnionFindSet(n)
for acc in accounts:
name = acc[0]
for email in acc[1:]:
# there's edge betwee first email to others
unionFind.union(email_to_id[acc[1]], email_to_id[email])
# key as root nodes, values are list of all nodes who are in the same component
components = collections.defaultdict(list)
# step3: iterate all nodes
for email in email_to_name:
components[unionFind.find(email_to_id[email])].append(email)
return [[email_to_name[v[0]]] + sorted(v) for k, v in components.items()]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment