Created
May 20, 2024 04:58
-
-
Save WinslowJosiah/92874a5d136daafcc0f351bfc39cbe08 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from contextlib import nullcontext | |
from purequransearch import Concordance, VALID_CORPORA | |
# Get corpus from user | |
corpus = input("Enter corpus (default pickthall): ") or "pickthall" | |
assert corpus in VALID_CORPORA | |
concordance = Concordance(corpus) | |
# Get verse from user | |
chapter, verse = map(int, input("Enter verse reference: ").split(":")) | |
phrase = concordance.content[chapter - 1][verse - 1] | |
words = [ | |
Concordance.normalize_word(word, preserve_case=True) | |
for word in Concordance.text_to_words(phrase) | |
] | |
print(f"{chapter}:{verse}\t{phrase}") | |
# Get other info from user | |
target = int(input("Enter target: ")) | |
max_phrase_length = int( | |
input("Enter max words in phrase (default 8): ") or 8 | |
) | |
max_deleted_words = int( | |
input("Enter max words to delete (default 4): ") or 4 | |
) | |
output_path = input("Enter output file path (default none): ") | |
# NOTE This function performs a recursive search of every possible | |
# partition and combination of search types (which will be a lot!). It | |
# tries to cut off some branches of the search if the occurrence/verse | |
# totals get past the target, but if you're dealing with a long verse, | |
# going through every possibility will be slow. | |
def search_miracles( | |
queries: list[str] | None = None, | |
window_start: int = 0, | |
occurrences: int = 0, | |
verses: set[tuple[int, int]] | None = None, | |
deleted_words: int = 0, | |
): | |
global total_occurrences | |
global total_verses | |
# NOTE Setting these defaults here avoids some nasty bugs! | |
if queries is None: | |
queries = [] | |
if verses is None: | |
verses = set() | |
# If occurrence/verse total is past the target, don't search deeper | |
if occurrences > target and len(verses) > target: | |
return | |
# If current partition uses the entire verse | |
if window_start >= len(words): | |
queries_str = ";".join( | |
query | |
for query in queries | |
if not query.startswith("#") | |
) | |
# Report occurrence/verse totals that hit the target | |
if occurrences == target: | |
yield f"{queries_str}\n= {occurrences} occurrence(s)" | |
total_occurrences += 1 | |
if len(verses) == target: | |
yield f"{queries_str}\n= {len(verses)} verse(s)" | |
total_verses += 1 | |
return | |
# For each possible "window" starting from this word | |
for window_end in range( | |
min(len(words), window_start + max_phrase_length), | |
window_start, | |
-1, | |
): | |
window = words[window_start:window_end] | |
# This window will correspond to a query we need to search | |
query = " ".join(window) | |
# Perform case-insensitive search | |
result = concordance.search(query) | |
occ = len(result) | |
vss = concordance.word_indices_to_verses(result) | |
# Go deeper | |
yield from search_miracles( | |
queries + [query], | |
window_end, | |
occurrences + occ, | |
verses | vss, | |
deleted_words, | |
) | |
# Perform case-sensitive search | |
result_case = concordance.search("^" + query) | |
occ_case = len(result_case) | |
vss_case = concordance.word_indices_to_verses(result_case) | |
# Only go deeper if the occurrence/verse totals are different | |
if not (occ_case == occ and vss_case == vss): | |
yield from search_miracles( | |
queries + ["^" + query], | |
window_end, | |
occurrences + occ_case, | |
verses | vss_case, | |
deleted_words, | |
) | |
# Perform no search; a deleted word will give no results | |
# Only go deeper if the last query wasn't deleted... | |
if (not queries or not queries[-1].startswith("#")): | |
# ...and we're still under the max number of deleted words | |
if deleted_words + len(window) <= max_deleted_words: | |
yield from search_miracles( | |
queries + ["#" + query], | |
window_end, | |
occurrences, | |
verses, | |
deleted_words + len(window), | |
) | |
print("Searching...") | |
# HACK If an output path was specified, I open a file and write to it; | |
# otherwise, I don't open a file, and I don't write to it. There might | |
# be a better way to do this, though. | |
with open(output_path, "w") if output_path else nullcontext() as outfile: | |
total_occurrences = 0 | |
total_verses = 0 | |
for miracle in search_miracles(): | |
print(miracle) | |
if output_path: | |
assert outfile is not None | |
outfile.write(miracle + "\n") | |
print() | |
print("Done.") | |
print( | |
f"Found {total_occurrences} occurrence miracle(s) and " | |
f"{total_verses} verse-count miracle(s)." | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment