Zemke/beautiful-soup.py

## beautiful-soup.py
import os
import re
import sys
from typing import IO

from bs4 import BeautifulSoup
from bs4.element import Tag

if len(sys.argv) == 2:
    argv_path = sys.argv[1]
else:
    argv_path = os.getcwd()


class Replacement:
    input: Tag = None
    label: Tag = None

    def __init__(self, input_tag: Tag, label_tag: Tag):
        self.input = input_tag
        self.label = label_tag
        self.id = input_tag.get('id') or input_tag.get('name')

    @staticmethod
    def find_matching_label(input_tag: Tag):
        assumed_label: Tag = Main.all_label_and_input_tags[Main.all_label_and_input_tags.index(input_tag) - 1]

        if assumed_label.name != 'label':
            return None

        return assumed_label

    @staticmethod
    def has_ng_model_and_name(tag: Tag) -> bool:
        return tag.has_attr('ng-model') and tag.has_attr('name')

    def __str__(self) -> str:
        return self.input.name \
               + "[" \
               + "ng-model=" + self.input.get('ng-model') + ', ' \
               + "name=" + self.input.get('name') + ', ' \
               + "label=" + self.label.contents[0] \
               + ']'


class Main:
    all_label_and_input_tags = []

    def __init__(self, file: IO, soup: BeautifulSoup):
        content = file.read()
        Main.all_label_and_input_tags = soup(['label', Replacement.has_ng_model_and_name])

        replacements = []

        for input_tag in list(filter(lambda x: x.name != 'label', Main.all_label_and_input_tags)):
            matching_label = Replacement.find_matching_label(input_tag)
            if matching_label is not None:
                replacements.append(Replacement(input_tag, matching_label))

        change_took_place = False

        for replacement in replacements:
            if replacement.input.name[:3] == 'pkp':
                continue

            if replacement.input.parent.name == 'label':
                continue

            if not replacement.input.has_attr('id'):
                content = Main.add_id_to_input_tag(content, replacement)
                change_took_place = True
            if not replacement.label.has_attr('for'):
                content = Main.add_for_to_label_tag(content, replacement)
                change_took_place = True

        if change_took_place:
            with open(file.name + '.out', 'w') as out_file:
                out_file.write(content)
            os.replace(out_file.name, file.name)

    @staticmethod
    def add_id_to_input_tag(content: str, replacement: Replacement) -> str:
        pattern = \
            '(<' \
            + re.escape(replacement.input.name) \
            + '[^>]*?(?=[^>]*name="' \
            + re.escape(replacement.input.get('name')) \
            + '")(?=[^>]*ng-model="' \
            + re.escape(replacement.input.get('ng-model')) \
            + '")[^>]*?)\/?>'
        return re.sub(pattern, r'\1 id="' + replacement.id + '">', content)

    @staticmethod
    def add_for_to_label_tag(content: str, replacement: Replacement) -> str:
        space_sep_strings = \
            " ".join(
                list(
                    filter(lambda y: y != ' ',
                           list(
                               map(
                                   lambda x: x.__str__().replace('>', ' ').replace('\n', ' '),
                                   replacement.label.contents))
                           ))
            ).split(" ")

        pattern = r"\<label([^\>]*?\>"
        for space_sep_string in space_sep_strings:
            pattern += r"(?=(?:(?!\<\/label\>).)*{})".format(re.escape(space_sep_string))
        pattern += r".*?\<\/label\>)"

        return re.sub(re.compile(pattern, re.DOTALL), r'<label for="{}"\1'.format(replacement.id), content)


def recurse(path):
    if os.path.isdir(path):
        path_contents = os.listdir(path)

        for path_content in path_contents:
            file_path = os.path.join(path, path_content)

            if os.path.isdir(file_path):
                recurse(file_path)
            elif os.path.splitext(path_content)[1] == '.html':
                process_html_file(file_path)


def process_html_file(file_path):
    file = None
    # noinspection PyBroadException
    try:
        file = open(file_path, "r+")
        Main(file, BeautifulSoup(open(file_path, "r+"), "html.parser"))
    except Exception as err:
        print('ERR', file_path, err)
    finally:
        if file is not None:
            file.close()
            print(file_path)


if os.path.isdir(argv_path):
    recurse(argv_path)
else:
    process_html_file(argv_path)
	import os
	import re
	import sys
	from typing import IO

	from bs4 import BeautifulSoup
	from bs4.element import Tag

	if len(sys.argv) == 2:
	argv_path = sys.argv[1]
	else:
	argv_path = os.getcwd()


	class Replacement:
	input: Tag = None
	label: Tag = None

	def __init__(self, input_tag: Tag, label_tag: Tag):
	self.input = input_tag
	self.label = label_tag
	self.id = input_tag.get('id') or input_tag.get('name')

	@staticmethod
	def find_matching_label(input_tag: Tag):
	assumed_label: Tag = Main.all_label_and_input_tags[Main.all_label_and_input_tags.index(input_tag) - 1]

	if assumed_label.name != 'label':
	return None

	return assumed_label

	@staticmethod
	def has_ng_model_and_name(tag: Tag) -> bool:
	return tag.has_attr('ng-model') and tag.has_attr('name')

	def __str__(self) -> str:
	return self.input.name \
	+ "[" \
	+ "ng-model=" + self.input.get('ng-model') + ', ' \
	+ "name=" + self.input.get('name') + ', ' \
	+ "label=" + self.label.contents[0] \
	+ ']'


	class Main:
	all_label_and_input_tags = []

	def __init__(self, file: IO, soup: BeautifulSoup):
	content = file.read()
	Main.all_label_and_input_tags = soup(['label', Replacement.has_ng_model_and_name])

	replacements = []

	for input_tag in list(filter(lambda x: x.name != 'label', Main.all_label_and_input_tags)):
	matching_label = Replacement.find_matching_label(input_tag)
	if matching_label is not None:
	replacements.append(Replacement(input_tag, matching_label))

	change_took_place = False

	for replacement in replacements:
	if replacement.input.name[:3] == 'pkp':
	continue

	if replacement.input.parent.name == 'label':
	continue

	if not replacement.input.has_attr('id'):
	content = Main.add_id_to_input_tag(content, replacement)
	change_took_place = True
	if not replacement.label.has_attr('for'):
	content = Main.add_for_to_label_tag(content, replacement)
	change_took_place = True

	if change_took_place:
	with open(file.name + '.out', 'w') as out_file:
	out_file.write(content)
	os.replace(out_file.name, file.name)

	@staticmethod
	def add_id_to_input_tag(content: str, replacement: Replacement) -> str:
	pattern = \
	'(<' \
	+ re.escape(replacement.input.name) \
	+ '[^>]?(?=[^>]name="' \
	+ re.escape(replacement.input.get('name')) \
	+ '")(?=[^>]*ng-model="' \
	+ re.escape(replacement.input.get('ng-model')) \
	+ '")[^>]*?)\/?>'
	return re.sub(pattern, r'\1 id="' + replacement.id + '">', content)

	@staticmethod
	def add_for_to_label_tag(content: str, replacement: Replacement) -> str:
	space_sep_strings = \
	" ".join(
	list(
	filter(lambda y: y != ' ',
	list(
	map(
	lambda x: x.__str__().replace('>', ' ').replace('\n', ' '),
	replacement.label.contents))
	))
	).split(" ")

	pattern = r"\<label([^\>]*?\>"
	for space_sep_string in space_sep_strings:
	pattern += r"(?=(?:(?!\<\/label\>).)*{})".format(re.escape(space_sep_string))
	pattern += r".*?\<\/label\>)"

	return re.sub(re.compile(pattern, re.DOTALL), r'<label for="{}"\1'.format(replacement.id), content)


	def recurse(path):
	if os.path.isdir(path):
	path_contents = os.listdir(path)

	for path_content in path_contents:
	file_path = os.path.join(path, path_content)

	if os.path.isdir(file_path):
	recurse(file_path)
	elif os.path.splitext(path_content)[1] == '.html':
	process_html_file(file_path)


	def process_html_file(file_path):
	file = None
	# noinspection PyBroadException
	try:
	file = open(file_path, "r+")
	Main(file, BeautifulSoup(open(file_path, "r+"), "html.parser"))
	except Exception as err:
	print('ERR', file_path, err)
	finally:
	if file is not None:
	file.close()
	print(file_path)


	if os.path.isdir(argv_path):
	recurse(argv_path)
	else:
	process_html_file(argv_path)