WinslowJosiah/quran-partition-miracles.py

## quran-partition-miracles.py
from contextlib import nullcontext

from purequransearch import Concordance, VALID_CORPORA

# Get corpus from user
corpus = input("Enter corpus (default pickthall): ") or "pickthall"
assert corpus in VALID_CORPORA
concordance = Concordance(corpus)

# Get verse from user
chapter, verse = map(int, input("Enter verse reference: ").split(":"))
phrase = concordance.content[chapter - 1][verse - 1]
words = [
    Concordance.normalize_word(word, preserve_case=True)
    for word in Concordance.text_to_words(phrase)
]
print(f"{chapter}:{verse}\t{phrase}")

# Get other info from user
target = int(input("Enter target: "))
max_phrase_length = int(
    input("Enter max words in phrase (default 8): ") or 8
)
max_deleted_words = int(
    input("Enter max words to delete (default 4): ") or 4
)
output_path = input("Enter output file path (default none): ")

# NOTE This function performs a recursive search of every possible
# partition and combination of search types (which will be a lot!). It
# tries to cut off some branches of the search if the occurrence/verse
# totals get past the target, but if you're dealing with a long verse,
# going through every possibility will be slow.
def search_miracles(
        queries: list[str] | None = None,
        window_start: int = 0,
        occurrences: int = 0,
        verses: set[tuple[int, int]] | None = None,
        deleted_words: int = 0,
):
    global total_occurrences
    global total_verses

    # NOTE Setting these defaults here avoids some nasty bugs!
    if queries is None:
        queries = []
    if verses is None:
        verses = set()

    # If occurrence/verse total is past the target, don't search deeper
    if occurrences > target and len(verses) > target:
        return

    # If current partition uses the entire verse
    if window_start >= len(words):
        queries_str = ";".join(
            query
            for query in queries
            if not query.startswith("#")
        )

        # Report occurrence/verse totals that hit the target
        if occurrences == target:
            yield f"{queries_str}\n= {occurrences} occurrence(s)"
            total_occurrences += 1
        if len(verses) == target:
            yield f"{queries_str}\n= {len(verses)} verse(s)"
            total_verses += 1
        return

    # For each possible "window" starting from this word
    for window_end in range(
        min(len(words), window_start + max_phrase_length),
        window_start,
        -1,
    ):
        window = words[window_start:window_end]
        # This window will correspond to a query we need to search
        query = " ".join(window)

        # Perform case-insensitive search
        result = concordance.search(query)
        occ = len(result)
        vss = concordance.word_indices_to_verses(result)
        # Go deeper
        yield from search_miracles(
            queries + [query],
            window_end,
            occurrences + occ,
            verses | vss,
            deleted_words,
        )

        # Perform case-sensitive search
        result_case = concordance.search("^" + query)
        occ_case = len(result_case)
        vss_case = concordance.word_indices_to_verses(result_case)
        # Only go deeper if the occurrence/verse totals are different
        if not (occ_case == occ and vss_case == vss):
            yield from search_miracles(
                queries + ["^" + query],
                window_end,
                occurrences + occ_case,
                verses | vss_case,
                deleted_words,
            )

        # Perform no search; a deleted word will give no results
        # Only go deeper if the last query wasn't deleted...
        if (not queries or not queries[-1].startswith("#")):
            # ...and we're still under the max number of deleted words
            if deleted_words + len(window) <= max_deleted_words:
                yield from search_miracles(
                    queries + ["#" + query],
                    window_end,
                    occurrences,
                    verses,
                    deleted_words + len(window),
                )

print("Searching...")
# HACK If an output path was specified, I open a file and write to it;
# otherwise, I don't open a file, and I don't write to it. There might
# be a better way to do this, though.
with open(output_path, "w") if output_path else nullcontext() as outfile:
    total_occurrences = 0
    total_verses = 0
    for miracle in search_miracles():
        print(miracle)
        if output_path:
            assert outfile is not None
            outfile.write(miracle + "\n")

    print()
    print("Done.")
    print(
        f"Found {total_occurrences} occurrence miracle(s) and "
        f"{total_verses} verse-count miracle(s)."
    )
	from contextlib import nullcontext

	from purequransearch import Concordance, VALID_CORPORA

	# Get corpus from user
	corpus = input("Enter corpus (default pickthall): ") or "pickthall"
	assert corpus in VALID_CORPORA
	concordance = Concordance(corpus)

	# Get verse from user
	chapter, verse = map(int, input("Enter verse reference: ").split(":"))
	phrase = concordance.content[chapter - 1][verse - 1]
	words = [
	Concordance.normalize_word(word, preserve_case=True)
	for word in Concordance.text_to_words(phrase)
	]
	print(f"{chapter}:{verse}\t{phrase}")

	# Get other info from user
	target = int(input("Enter target: "))
	max_phrase_length = int(
	input("Enter max words in phrase (default 8): ") or 8
	)
	max_deleted_words = int(
	input("Enter max words to delete (default 4): ") or 4
	)
	output_path = input("Enter output file path (default none): ")

	# NOTE This function performs a recursive search of every possible
	# partition and combination of search types (which will be a lot!). It
	# tries to cut off some branches of the search if the occurrence/verse
	# totals get past the target, but if you're dealing with a long verse,
	# going through every possibility will be slow.
	def search_miracles(
	queries: list[str] \| None = None,
	window_start: int = 0,
	occurrences: int = 0,
	verses: set[tuple[int, int]] \| None = None,
	deleted_words: int = 0,
	):
	global total_occurrences
	global total_verses

	# NOTE Setting these defaults here avoids some nasty bugs!
	if queries is None:
	queries = []
	if verses is None:
	verses = set()

	# If occurrence/verse total is past the target, don't search deeper
	if occurrences > target and len(verses) > target:
	return

	# If current partition uses the entire verse
	if window_start >= len(words):
	queries_str = ";".join(
	query
	for query in queries
	if not query.startswith("#")
	)

	# Report occurrence/verse totals that hit the target
	if occurrences == target:
	yield f"{queries_str}\n= {occurrences} occurrence(s)"
	total_occurrences += 1
	if len(verses) == target:
	yield f"{queries_str}\n= {len(verses)} verse(s)"
	total_verses += 1
	return

	# For each possible "window" starting from this word
	for window_end in range(
	min(len(words), window_start + max_phrase_length),
	window_start,
	-1,
	):
	window = words[window_start:window_end]
	# This window will correspond to a query we need to search
	query = " ".join(window)

	# Perform case-insensitive search
	result = concordance.search(query)
	occ = len(result)
	vss = concordance.word_indices_to_verses(result)
	# Go deeper
	yield from search_miracles(
	queries + [query],
	window_end,
	occurrences + occ,
	verses \| vss,
	deleted_words,
	)

	# Perform case-sensitive search
	result_case = concordance.search("^" + query)
	occ_case = len(result_case)
	vss_case = concordance.word_indices_to_verses(result_case)
	# Only go deeper if the occurrence/verse totals are different
	if not (occ_case == occ and vss_case == vss):
	yield from search_miracles(
	queries + ["^" + query],
	window_end,
	occurrences + occ_case,
	verses \| vss_case,
	deleted_words,
	)

	# Perform no search; a deleted word will give no results
	# Only go deeper if the last query wasn't deleted...
	if (not queries or not queries[-1].startswith("#")):
	# ...and we're still under the max number of deleted words
	if deleted_words + len(window) <= max_deleted_words:
	yield from search_miracles(
	queries + ["#" + query],
	window_end,
	occurrences,
	verses,
	deleted_words + len(window),
	)

	print("Searching...")
	# HACK If an output path was specified, I open a file and write to it;
	# otherwise, I don't open a file, and I don't write to it. There might
	# be a better way to do this, though.
	with open(output_path, "w") if output_path else nullcontext() as outfile:
	total_occurrences = 0
	total_verses = 0
	for miracle in search_miracles():
	print(miracle)
	if output_path:
	assert outfile is not None
	outfile.write(miracle + "\n")

	print()
	print("Done.")
	print(
	f"Found {total_occurrences} occurrence miracle(s) and "
	f"{total_verses} verse-count miracle(s)."
	)