Created
January 18, 2021 06:34
-
-
Save liyunrui/361c74acc16ba47b1dd201c64df4e90a to your computer and use it in GitHub Desktop.
leetcode-graph
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class UnionFindSet: | |
def __init__(self, n): | |
#Initially, all elements are single element subsets. | |
self._parents = [node for node in range(n)] | |
# it's like height of tree but it's not always equal to height because path compression technique. | |
self._ranks = [1 for i in range(n)] | |
def find(self, u): | |
""" | |
The find method with path compression, return root of node u. | |
""" | |
while u != self._parents[u]: | |
# pass compression technique: make x and all parents of x pointed to root node of x so that we don’t have to traverse all intermediate nodes again.(flat tree) | |
self._parents[u] = self._parents[self._parents[u]] | |
u = self._parents[u] | |
return u | |
def union(self, u, v): | |
""" | |
The union method, with optimization union by rank. It returns False if a u and v are in the same tree/connected already, True if otherwise. | |
""" | |
root_u, root_v = self.find(u), self.find(v) | |
if root_u == root_v: | |
return False | |
if self._ranks[root_u] < self._ranks[root_v]: | |
self._parents[root_u] = root_v | |
elif self._ranks[root_u] > self._ranks[root_v]: | |
self._parents[root_v] = root_u | |
else: | |
# If ranks are same, then make one as root and increment its rank by one | |
self._parents[root_v] = root_u | |
self._ranks[root_u] += 1 | |
return True | |
class Solution: | |
""" | |
Problem Clarification: | |
1.same name but might be different account | |
2.same email mean they're same account and must be same name | |
3.return email in sorted order | |
Thought Process: | |
Graph-DFS | |
1.draw edges between emails if they occur in the same account. Then, we come down to finding all nodes in same connected component because it represents they're same account. | |
2.Let's put in this way, think of email as a node. Name represent this connected component. | |
For example, | |
{NameA, 1, 2, 3} => NameA -- 1 -- 2 -- 3 | |
{NameA, 2, 4, 5} => NameA -- 2 -- 4 -- 5 (The same graph node 2 appears) | |
{NameB, 6, 7, 8} => NameB -- 6 -- 7 -- 8 | |
(Where numbers represent email addresses). | |
(Name A) | |
1-2-3 | |
| | |
4-5 6-7-8 (NameB) | |
# build graph from first email to other emails | |
1-2-4 6-7 | |
| | | | |
3 5 8 | |
Step1: build the graph and create email_to_name hashmap | |
Step2: we iterate all possible nodes(e-mail), for each node, we do dfs search on graph. Think of edge as first email to other emails. | |
Step2-1 | |
if node is not visted, every node in the same connected conected component is from same accout | |
Step2-2 | |
do a dfs on graph to find a list to called component to store all nodes(email) in this graph | |
Step2-3 | |
append name + sorted compnent into ans list which we'd like to return in the end. | |
Union-Find | |
Step1: build the graph and create email_to_name and email_to_id hashmap | |
Step2: do union for each email. edge is first email to other emails. | |
Step3: we iterate all nodes. | |
If root node is same, we append this email to answer | |
Note: | |
1. visited array is used in the case node represent with id 0 to n | |
2. visited set is used in the case node represnet with string | |
3. there's a cycle | |
Reference: | |
https://leetcode.com/problems/accounts-merge/discuss/109161/Python-Simple-DFS-with-explanation!!! | |
""" | |
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]: | |
""" | |
DFS | |
T:O(a_i) + O(n) + O(b_i *logb_i) where n is number of unique emails and b_i is lengh of components[i] and a_i is length of accounts[i] | |
S:O(n) the space used by graph and email_to_name | |
""" | |
# step1 | |
# g = collections.defaultdict(list) | |
g = collections.defaultdict(set) # the reason we use set is to avoid there's duplicate edge in our graph | |
email_to_name = {} | |
for acc in accounts: | |
name = acc[0] | |
for email in acc[1:]: | |
email_to_name[email] = name | |
# graph from first email to other emails | |
# g[email].append(acc[1]) | |
# g[acc[1]].append(email) | |
g[email].add(acc[1]) | |
g[acc[1]].add(email) | |
# since it's undirected graph | |
# step2 | |
n = len(email_to_name) | |
#visited = [False for _ in range(n)] # used for node represents with id 0 to n | |
visited = set([]) | |
accounts_merge = [] | |
for node in email_to_name.keys(): | |
if node not in visited: | |
components = [] | |
self.dfs(node, components ,visited, g) | |
name = email_to_name[node] | |
accounts_merge.append([name]+sorted(components)) | |
return accounts_merge | |
def dfs(self, cur, components ,visited, graph): | |
visited.add(cur) | |
components.append(cur) | |
for nei_node in graph[cur]: | |
if nei_node not in visited: | |
self.dfs(nei_node, components ,visited, graph) | |
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]: | |
""" | |
Union-Find | |
""" | |
# step1: | |
email_to_name = {} | |
email_to_id = {} | |
i = 0 | |
for acc in accounts: | |
name = acc[0] | |
for email in acc[1:]: | |
email_to_name[email] = name | |
if email not in email_to_id: | |
email_to_id[email] = i | |
i += 1 | |
# step2: union nodes with the edge | |
n = len(email_to_name) | |
unionFind = UnionFindSet(n) | |
for acc in accounts: | |
name = acc[0] | |
for email in acc[1:]: | |
# there's edge betwee first email to others | |
unionFind.union(email_to_id[acc[1]], email_to_id[email]) | |
# key as root nodes, values are list of all nodes who are in the same component | |
components = collections.defaultdict(list) | |
# step3: iterate all nodes | |
for email in email_to_name: | |
components[unionFind.find(email_to_id[email])].append(email) | |
return [[email_to_name[v[0]]] + sorted(v) for k, v in components.items()] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment