liyunrui/721. Accounts Merge.py

## 721. Accounts Merge.py
class UnionFindSet:
    def __init__(self, n):
        #Initially, all elements are single element subsets.
        self._parents = [node for node in range(n)]
        # it's like height of tree but it's not always equal to height because path compression technique.
        self._ranks = [1 for i in range(n)]

    def find(self, u):
        """
        The find method with path compression, return root of node u.
        """
        while u != self._parents[u]:
            # pass compression technique: make x and all parents of x pointed to root node of x so that we don’t have to traverse all intermediate nodes again.(flat tree)
            self._parents[u] = self._parents[self._parents[u]]
            u = self._parents[u]
        return u

    def union(self, u, v):
        """
        The union method, with optimization union by rank. It returns False if a u and v are in the same tree/connected already, True if otherwise.
        """
        root_u, root_v = self.find(u), self.find(v)
        if root_u == root_v:
            return False

        if self._ranks[root_u] < self._ranks[root_v]:
            self._parents[root_u] = root_v
        elif self._ranks[root_u] > self._ranks[root_v]:
            self._parents[root_v] = root_u
        else:
            # If ranks are same, then make one as root and increment its rank by one
            self._parents[root_v] = root_u
            self._ranks[root_u] += 1
        return True

class Solution:
    """
    Problem Clarification:
        1.same name but might be different account
        2.same email mean they're same account and must be same name
        3.return email in sorted order
    Thought Process:
        Graph-DFS
        1.draw edges between emails if they occur in the same account. Then, we come down to finding all nodes in same connected component because it represents they're same account.
        2.Let's put in this way, think of email as a node. Name represent this connected component.

    For example,
    {NameA, 1, 2, 3} => NameA -- 1 -- 2 -- 3
    {NameA, 2, 4, 5} => NameA -- 2 -- 4 -- 5 (The same graph node 2 appears)
    {NameB, 6, 7, 8} => NameB -- 6 -- 7 -- 8
    (Where numbers represent email addresses).

    (Name A)
    1-2-3
      |
      4-5   6-7-8 (NameB)

    # build graph from first email to other emails
    1-2-4   6-7
    | |     |
    3 5     8

        Step1: build the graph and create email_to_name hashmap

        Step2: we iterate all possible nodes(e-mail), for each node, we do dfs search on graph. Think of edge as first email to other emails.
            Step2-1
                if node is not visted, every node in the same connected conected component is from same accout
            Step2-2
                do a dfs on graph to find a list to called component to store all nodes(email) in this graph

            Step2-3
                append name + sorted compnent into ans list which we'd like to return in the end.

        Union-Find
            Step1: build the graph and create email_to_name and email_to_id hashmap
            Step2: do union for each email. edge is first email to other emails.
            Step3: we iterate all nodes.
                If root node is same, we append this email to answer
    Note:
        1. visited array is used in the case node represent with id 0 to n
        2. visited set is used in the case node represnet with string
        3. there's a cycle
    Reference:
        https://leetcode.com/problems/accounts-merge/discuss/109161/Python-Simple-DFS-with-explanation!!!
    """
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        """
        DFS
        T:O(a_i) + O(n) + O(b_i *logb_i) where n is number of unique emails and b_i is lengh of components[i] and a_i is length of accounts[i]
        S:O(n) the space used by graph and email_to_name
        """
        # step1
        # g = collections.defaultdict(list)
        g = collections.defaultdict(set) # the reason we use set is to avoid there's duplicate edge in our graph
        email_to_name = {}
        for acc in accounts:
            name = acc[0]
            for email in acc[1:]:
                email_to_name[email] = name
                # graph from first email to other emails
                # g[email].append(acc[1])
                # g[acc[1]].append(email)
                g[email].add(acc[1])
                g[acc[1]].add(email)
                # since it's undirected graph
        # step2
        n = len(email_to_name)
        #visited = [False for _ in range(n)] # used for node represents with id 0 to n
        visited = set([])
        accounts_merge = []
        for node in email_to_name.keys():
            if node not in visited:
                components = []
                self.dfs(node, components ,visited, g)
                name = email_to_name[node]
                accounts_merge.append([name]+sorted(components))
        return accounts_merge

    def dfs(self, cur, components ,visited, graph):
        visited.add(cur)
        components.append(cur)
        for nei_node in graph[cur]:
            if nei_node not in visited:
                self.dfs(nei_node, components ,visited, graph)

    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        """
        Union-Find
        """
        # step1:
        email_to_name = {}
        email_to_id = {}
        i = 0
        for acc in accounts:
            name = acc[0]
            for email in acc[1:]:
                email_to_name[email] = name
                if email not in email_to_id:
                    email_to_id[email] = i
                    i += 1
        # step2: union nodes with the edge
        n = len(email_to_name)
        unionFind = UnionFindSet(n)

        for acc in accounts:
            name = acc[0]
            for email in acc[1:]:
                # there's edge betwee first email to others
                unionFind.union(email_to_id[acc[1]], email_to_id[email])


        # key as root nodes, values are list of all nodes who are in the same component
        components = collections.defaultdict(list)
        # step3: iterate all nodes
        for email in email_to_name:
            components[unionFind.find(email_to_id[email])].append(email)
        return [[email_to_name[v[0]]] + sorted(v) for k, v in components.items()]
	class UnionFindSet:
	def __init__(self, n):
	#Initially, all elements are single element subsets.
	self._parents = [node for node in range(n)]
	# it's like height of tree but it's not always equal to height because path compression technique.
	self._ranks = [1 for i in range(n)]

	def find(self, u):
	"""
	The find method with path compression, return root of node u.
	"""
	while u != self._parents[u]:
	# pass compression technique: make x and all parents of x pointed to root node of x so that we don’t have to traverse all intermediate nodes again.(flat tree)
	self._parents[u] = self._parents[self._parents[u]]
	u = self._parents[u]
	return u

	def union(self, u, v):
	"""
	The union method, with optimization union by rank. It returns False if a u and v are in the same tree/connected already, True if otherwise.
	"""
	root_u, root_v = self.find(u), self.find(v)
	if root_u == root_v:
	return False

	if self._ranks[root_u] < self._ranks[root_v]:
	self._parents[root_u] = root_v
	elif self._ranks[root_u] > self._ranks[root_v]:
	self._parents[root_v] = root_u
	else:
	# If ranks are same, then make one as root and increment its rank by one
	self._parents[root_v] = root_u
	self._ranks[root_u] += 1
	return True

	class Solution:
	"""
	Problem Clarification:
	1.same name but might be different account
	2.same email mean they're same account and must be same name
	3.return email in sorted order
	Thought Process:
	Graph-DFS
	1.draw edges between emails if they occur in the same account. Then, we come down to finding all nodes in same connected component because it represents they're same account.
	2.Let's put in this way, think of email as a node. Name represent this connected component.

	For example,
	{NameA, 1, 2, 3} => NameA -- 1 -- 2 -- 3
	{NameA, 2, 4, 5} => NameA -- 2 -- 4 -- 5 (The same graph node 2 appears)
	{NameB, 6, 7, 8} => NameB -- 6 -- 7 -- 8
	(Where numbers represent email addresses).

	(Name A)
	1-2-3
	\|
	4-5 6-7-8 (NameB)

	# build graph from first email to other emails
	1-2-4 6-7
	\| \| \|
	3 5 8

	Step1: build the graph and create email_to_name hashmap

	Step2: we iterate all possible nodes(e-mail), for each node, we do dfs search on graph. Think of edge as first email to other emails.
	Step2-1
	if node is not visted, every node in the same connected conected component is from same accout
	Step2-2
	do a dfs on graph to find a list to called component to store all nodes(email) in this graph

	Step2-3
	append name + sorted compnent into ans list which we'd like to return in the end.

	Union-Find
	Step1: build the graph and create email_to_name and email_to_id hashmap
	Step2: do union for each email. edge is first email to other emails.
	Step3: we iterate all nodes.
	If root node is same, we append this email to answer
	Note:
	1. visited array is used in the case node represent with id 0 to n
	2. visited set is used in the case node represnet with string
	3. there's a cycle
	Reference:
	https://leetcode.com/problems/accounts-merge/discuss/109161/Python-Simple-DFS-with-explanation!!!
	"""
	def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
	"""
	DFS
	T:O(a_i) + O(n) + O(b_i *logb_i) where n is number of unique emails and b_i is lengh of components[i] and a_i is length of accounts[i]
	S:O(n) the space used by graph and email_to_name
	"""
	# step1
	# g = collections.defaultdict(list)
	g = collections.defaultdict(set) # the reason we use set is to avoid there's duplicate edge in our graph
	email_to_name = {}
	for acc in accounts:
	name = acc[0]
	for email in acc[1:]:
	email_to_name[email] = name
	# graph from first email to other emails
	# g[email].append(acc[1])
	# g[acc[1]].append(email)
	g[email].add(acc[1])
	g[acc[1]].add(email)
	# since it's undirected graph
	# step2
	n = len(email_to_name)
	#visited = [False for _ in range(n)] # used for node represents with id 0 to n
	visited = set([])
	accounts_merge = []
	for node in email_to_name.keys():
	if node not in visited:
	components = []
	self.dfs(node, components ,visited, g)
	name = email_to_name[node]
	accounts_merge.append([name]+sorted(components))
	return accounts_merge

	def dfs(self, cur, components ,visited, graph):
	visited.add(cur)
	components.append(cur)
	for nei_node in graph[cur]:
	if nei_node not in visited:
	self.dfs(nei_node, components ,visited, graph)

	def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
	"""
	Union-Find
	"""
	# step1:
	email_to_name = {}
	email_to_id = {}
	i = 0
	for acc in accounts:
	name = acc[0]
	for email in acc[1:]:
	email_to_name[email] = name
	if email not in email_to_id:
	email_to_id[email] = i
	i += 1
	# step2: union nodes with the edge
	n = len(email_to_name)
	unionFind = UnionFindSet(n)

	for acc in accounts:
	name = acc[0]
	for email in acc[1:]:
	# there's edge betwee first email to others
	unionFind.union(email_to_id[acc[1]], email_to_id[email])


	# key as root nodes, values are list of all nodes who are in the same component
	components = collections.defaultdict(list)
	# step3: iterate all nodes
	for email in email_to_name:
	components[unionFind.find(email_to_id[email])].append(email)
	return [[email_to_name[v[0]]] + sorted(v) for k, v in components.items()]