dmitri-d/search_replace_docs.py

## search_replace_docs.py
from typing import Tuple, List
import os
import os.path
import fileinput
import re
import sys

class TrieNode(object):
    def __init__(self, fragment: str):
        self.fragment = fragment
        self.children = []
        # Is it the last character of the word.`
        self.word_finished = False
        # How many times this character appeared in the addition process
        self.counter = 1


def add(root, fragments: List[str]):
    node = root
    for fragment in fragments:
        found_in_child = False
        # Search for the character in the children of the present `node`
        for child in node.children:
            if child.fragment == fragment:
                # We found it, increase the counter by 1 to keep track that another
                # word has it as well
                child.counter += 1
                # And point the node to the child that contains this char
                node = child
                found_in_child = True
                break
        # We did not find it so add a new chlid
        if not found_in_child:
            new_node = TrieNode(fragment)
            node.children.append(new_node)
            # And then point node to the new child
            node = new_node
    # Everything finished. Mark it as the end of a word.
    node.word_finished = True

def get_suffix(root, collected) -> Tuple[bool, List[str]]:
    node = root

    if len(node.children) > 1:
        return False, []

    collected.append(node.fragment)

    if not root.children:
        return True, collected

    return get_suffix(node.children[0], collected)


def find_match(root, prefix) -> Tuple[bool, List[str]]:
    node = root
    matched_so_far = []
    if not root.children:
        return False, []
    for fragment in prefix:
        char_not_found = True
        for child in node.children:
            if child.fragment == fragment:
                char_not_found = False
                node = child
                matched_so_far.append(child.fragment)
                break
        if char_not_found:
            if len(node.children) < 2:
                suffix = get_suffix(node.children[0], [])
                if suffix[0]:
                    matched_so_far.extend(suffix[1])
                    return True, to_ref(matched_so_far)
                return False, []
            return False, []
    return True, to_ref(matched_so_far)

def to_ref(fragments):
    return ".".join(reversed(fragments))

def load_refs_from_file(root, path):
    saw_file_ref = False
    f = open(path, "r")

    for l in f:
        if l.startswith(".. _"):
            if saw_file_ref == False:
                saw_file_ref = True
                continue

            #print(l[4:-2])
            to_add = list(reversed(l[4:-2].split(".")))
            #to_add[-1] = to_add[-1].replace("envoy_v3_api", "envoy_api")
            add(root, to_add)

    f.close()

def load_refs_from_dir(root):
    for dirpath, dirnames, filenames in os.walk("/home/wb/sandbox/envoy/generated/rst/api-v3/"):
        for filename in [f for f in filenames if f.endswith(".rst")]:
            print("loading from file: ", os.path.join(dirpath, filename))
            load_refs_from_file(root, os.path.join(dirpath, filename))

def load_proto_from_file(root, path):
    saw_package = False
    f = open(path, "r")

    for l in f:
        if l.startswith("package"):
            saw_package = True
            package = re.search("^package\s+(.+?);$", l).group(1).split(".")
        if l.startswith("message") and saw_package:
            msg = re.search("^message\s+(.+?)\s*{", l).group(1).split(".")
            fully_qualified = package + msg
            to_add = list(reversed(fully_qualified))
            #print(to_add)
            add(root, to_add)

    f.close()

def load_protos_from_dir(root):
   for dirpath, dirnames, filenames in os.walk("/home/wb/sandbox/envoy/api/envoy"):
        for filename in [f for f in filenames if f.endswith(".proto")]:
            path = os.path.join(dirpath, filename)
            if "v3" in path:
                print("loading from file: ", path)
                load_proto_from_file(root, path)

def replace_protos_in_file(root, whole_file):
    matches = re.findall("\"@type\":\s+type.googleapis.com/(.+)", whole_file)

    for m in matches:
        replacement = find_match(root, list(reversed(m.split("."))))
        if replacement[0]:
            whole_file = whole_file.replace(m, replacement[1])

    return whole_file

def replace_refs_in_file(root, whole_file):
    matches = re.findall(":ref:`.+?<(.+?)>`", whole_file, re.DOTALL)

    for m in matches:
        replacement = find_match(root, list(reversed(m.split("."))))
        if replacement[0]:
            whole_file = whole_file.replace(m, replacement[1])

    return whole_file

def replace_in_dir(root, proto_root, dirpath):
       for dirpath, dirnames, filenames in os.walk(dirpath):
        for filename in [f for f in filenames if f.endswith(".rst")]:
            path = os.path.join(dirpath, filename)
            print("replacing in file: ", path)

            with open(path, "r") as f:
                whole_file = f.read()

            updated = replace_refs_in_file(root, whole_file)
            final = replace_protos_in_file(proto_root, updated)

            with open(path + ".new", "w") as fw:
                fw.write(final)

if __name__ == "__main__":
    root = TrieNode('*')
    load_refs_from_dir(root)
    proto_root = TrieNode('*')
    load_protos_from_dir(proto_root)
    if (len(sys.argv) < 2):
        print("Usage: search_replace_docs.py path_to_directory")
        exit(1)
    replace_in_dir(root, proto_root, sys.argv[1])
	from typing import Tuple, List
	import os
	import os.path
	import fileinput
	import re
	import sys

	class TrieNode(object):
	def __init__(self, fragment: str):
	self.fragment = fragment
	self.children = []
	# Is it the last character of the word.`
	self.word_finished = False
	# How many times this character appeared in the addition process
	self.counter = 1


	def add(root, fragments: List[str]):
	node = root
	for fragment in fragments:
	found_in_child = False
	# Search for the character in the children of the present `node`
	for child in node.children:
	if child.fragment == fragment:
	# We found it, increase the counter by 1 to keep track that another
	# word has it as well
	child.counter += 1
	# And point the node to the child that contains this char
	node = child
	found_in_child = True
	break
	# We did not find it so add a new chlid
	if not found_in_child:
	new_node = TrieNode(fragment)
	node.children.append(new_node)
	# And then point node to the new child
	node = new_node
	# Everything finished. Mark it as the end of a word.
	node.word_finished = True

	def get_suffix(root, collected) -> Tuple[bool, List[str]]:
	node = root

	if len(node.children) > 1:
	return False, []

	collected.append(node.fragment)

	if not root.children:
	return True, collected

	return get_suffix(node.children[0], collected)


	def find_match(root, prefix) -> Tuple[bool, List[str]]:
	node = root
	matched_so_far = []
	if not root.children:
	return False, []
	for fragment in prefix:
	char_not_found = True
	for child in node.children:
	if child.fragment == fragment:
	char_not_found = False
	node = child
	matched_so_far.append(child.fragment)
	break
	if char_not_found:
	if len(node.children) < 2:
	suffix = get_suffix(node.children[0], [])
	if suffix[0]:
	matched_so_far.extend(suffix[1])
	return True, to_ref(matched_so_far)
	return False, []
	return False, []
	return True, to_ref(matched_so_far)

	def to_ref(fragments):
	return ".".join(reversed(fragments))

	def load_refs_from_file(root, path):
	saw_file_ref = False
	f = open(path, "r")

	for l in f:
	if l.startswith(".. _"):
	if saw_file_ref == False:
	saw_file_ref = True
	continue

	#print(l[4:-2])
	to_add = list(reversed(l[4:-2].split(".")))
	#to_add[-1] = to_add[-1].replace("envoy_v3_api", "envoy_api")
	add(root, to_add)

	f.close()

	def load_refs_from_dir(root):
	for dirpath, dirnames, filenames in os.walk("/home/wb/sandbox/envoy/generated/rst/api-v3/"):
	for filename in [f for f in filenames if f.endswith(".rst")]:
	print("loading from file: ", os.path.join(dirpath, filename))
	load_refs_from_file(root, os.path.join(dirpath, filename))

	def load_proto_from_file(root, path):
	saw_package = False
	f = open(path, "r")

	for l in f:
	if l.startswith("package"):
	saw_package = True
	package = re.search("^package\s+(.+?);$", l).group(1).split(".")
	if l.startswith("message") and saw_package:
	msg = re.search("^message\s+(.+?)\s*{", l).group(1).split(".")
	fully_qualified = package + msg
	to_add = list(reversed(fully_qualified))
	#print(to_add)
	add(root, to_add)

	f.close()

	def load_protos_from_dir(root):
	for dirpath, dirnames, filenames in os.walk("/home/wb/sandbox/envoy/api/envoy"):
	for filename in [f for f in filenames if f.endswith(".proto")]:
	path = os.path.join(dirpath, filename)
	if "v3" in path:
	print("loading from file: ", path)
	load_proto_from_file(root, path)

	def replace_protos_in_file(root, whole_file):
	matches = re.findall("\"@type\":\s+type.googleapis.com/(.+)", whole_file)

	for m in matches:
	replacement = find_match(root, list(reversed(m.split("."))))
	if replacement[0]:
	whole_file = whole_file.replace(m, replacement[1])

	return whole_file

	def replace_refs_in_file(root, whole_file):
	matches = re.findall(":ref:`.+?<(.+?)>`", whole_file, re.DOTALL)

	for m in matches:
	replacement = find_match(root, list(reversed(m.split("."))))
	if replacement[0]:
	whole_file = whole_file.replace(m, replacement[1])

	return whole_file

	def replace_in_dir(root, proto_root, dirpath):
	for dirpath, dirnames, filenames in os.walk(dirpath):
	for filename in [f for f in filenames if f.endswith(".rst")]:
	path = os.path.join(dirpath, filename)
	print("replacing in file: ", path)

	with open(path, "r") as f:
	whole_file = f.read()

	updated = replace_refs_in_file(root, whole_file)
	final = replace_protos_in_file(proto_root, updated)

	with open(path + ".new", "w") as fw:
	fw.write(final)

	if __name__ == "__main__":
	root = TrieNode('*')
	load_refs_from_dir(root)
	proto_root = TrieNode('*')
	load_protos_from_dir(proto_root)
	if (len(sys.argv) < 2):
	print("Usage: search_replace_docs.py path_to_directory")
	exit(1)
	replace_in_dir(root, proto_root, sys.argv[1])